diff --git a/.circleci/build_docs/commit_docs.sh b/.circleci/build_docs/commit_docs.sh new file mode 100755 index 00000000000..b923b0edbc4 --- /dev/null +++ b/.circleci/build_docs/commit_docs.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +set -ex + + +if [ "$2" == "" ]; then + echo call as "$0" "" "" + echo where src is the root of the built documentation git checkout and + echo branch should be "master" or "1.7" or so + exit 1 +fi + +src=$1 +target=$2 + +echo "committing docs from ${src} to ${target}" + +pushd "${src}" +git checkout gh-pages +mkdir -p ./"${target}" +rm -rf ./"${target}"/* +cp -r "${src}/docs/build/html/"* ./"$target" +if [ "${target}" == "master" ]; then + mkdir -p ./_static + rm -rf ./_static/* + cp -r "${src}/docs/build/html/_static/"* ./_static + git add --all ./_static || true +fi +git add --all ./"${target}" || true +git config user.email "soumith+bot@pytorch.org" +git config user.name "pytorchbot" +# If there aren't changes, don't make a commit; push is no-op +git commit -m "auto-generating sphinx docs" || true +git remote add https https://github.com/pytorch/vision.git +git push -u https gh-pages diff --git a/.circleci/config.yml b/.circleci/config.yml index 7e754846023..c12f0b731c4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,14 +6,17 @@ version: 2.1 # - Replace binary_linux_wheel_py3.7 with the name of the job you want to test. # Job names are 'name:' key. -orbs: - win: circleci/windows@2.0.0 - executors: - windows-gpu-prototype: + windows-cpu: + machine: + resource_class: windows.xlarge + image: windows-server-2019-vs2019:stable + shell: bash.exe + + windows-gpu: machine: - resource_class: windows.gpu.small.prototype - image: windows-server-2019-nvidia:201908-28 + resource_class: windows.gpu.nvidia.medium + image: windows-server-2019-nvidia:stable shell: bash.exe commands: @@ -30,10 +33,83 @@ commands: # git fetch --force origin ${CIRCLE_BRANCH}/merge:merged/${CIRCLE_BRANCH} # git checkout "merged/$CIRCLE_BRANCH" # fi + designate_upload_channel: + description: "inserts the correct upload channel into ${BASH_ENV}" + steps: + - run: + name: adding UPLOAD_CHANNEL to BASH_ENV + command: | + our_upload_channel=nightly + # On tags upload to test instead + if [[ -n "${CIRCLE_TAG}" ]]; then + our_upload_channel=test + fi + echo "export UPLOAD_CHANNEL=${our_upload_channel}" >> ${BASH_ENV} + install_cuda_compatible_cmath: + description: "Install CUDA compatible cmath" + steps: + - run: + name: _HACK_ Install CUDA compatible cmath + no_output_timeout: 1m + command: | + powershell .circleci/scripts/vs_install_cmath.ps1 + + brew_update: + description: "Update Homebrew and install base formulae" + steps: + - run: + name: Update Homebrew + no_output_timeout: "10m" + command: | + set -ex + + # Update repositories manually. + # Running `brew update` produces a comparison between the + # current checkout and the updated checkout, which takes a + # very long time because the existing checkout is 2y old. + for path in $(find /usr/local/Homebrew -type d -name .git) + do + cd $path/.. + git fetch --depth=1 origin + git reset --hard origin/master + done + + export HOMEBREW_NO_AUTO_UPDATE=1 + + # Install expect and moreutils so that we can call `unbuffer` and `ts`. + # moreutils installs a `parallel` executable by default, which conflicts + # with the executable from the GNU `parallel`, so we must unlink GNU + # `parallel` first, and relink it afterwards. + brew install coreutils + brew unlink parallel + brew install moreutils + brew link parallel --overwrite + brew install expect + + brew_install: + description: "Install Homebrew formulae" + parameters: + formulae: + type: string + default: "" + steps: + - run: + name: Install << parameters.formulae >> + no_output_timeout: "10m" + command: | + set -ex + export HOMEBREW_NO_AUTO_UPDATE=1 + brew install << parameters.formulae >> + + run_brew_for_ios_build: + steps: + - brew_update + - brew_install: + formulae: libtool binary_common: &binary_common parameters: - # Edit these defaults to do a release` + # Edit these defaults to do a release build_version: description: "version number of release binary; by default, build a nightly" type: string @@ -49,6 +125,7 @@ binary_common: &binary_common cu_version: description: "CUDA version to build against, in CU format (e.g., cpu or cu100)" type: string + default: "cpu" unicode_abi: description: "Python 2.7 wheel only: whether or not we are cp27mu (default: no)" type: string @@ -56,14 +133,38 @@ binary_common: &binary_common wheel_docker_image: description: "Wheel only: what docker image to use" type: string - default: "soumith/manylinux-cuda101" + default: "pytorch/manylinux-cuda101" + conda_docker_image: + description: "Conda only: what docker image to use" + type: string + default: "pytorch/conda-builder:cpu" environment: PYTHON_VERSION: << parameters.python_version >> - BUILD_VERSION: << parameters.build_version >> PYTORCH_VERSION: << parameters.pytorch_version >> UNICODE_ABI: << parameters.unicode_abi >> CU_VERSION: << parameters.cu_version >> +torchvision_ios_params: &torchvision_ios_params + parameters: + build_environment: + type: string + default: "" + ios_arch: + type: string + default: "" + ios_platform: + type: string + default: "" + environment: + BUILD_ENVIRONMENT: << parameters.build_environment >> + IOS_ARCH: << parameters.ios_arch >> + IOS_PLATFORM: << parameters.ios_platform >> + +smoke_test_common: &smoke_test_common + <<: *binary_common + docker: + - image: torchvision/smoke_test:latest + jobs: circleci_consistency: docker: @@ -76,6 +177,78 @@ jobs: python .circleci/regenerate.py git diff --exit-code || (echo ".circleci/config.yml not in sync with config.yml.in! Run .circleci/regenerate.py to update config"; exit 1) + python_lint: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + pip install --user --progress-bar off flake8 typing + flake8 --config=setup.cfg . + + python_type_check: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + sudo apt-get update -y + sudo apt install -y libturbojpeg-dev + pip install --user --progress-bar off mypy + pip install --user --progress-bar off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + pip install --user --progress-bar off --editable . + mypy --config-file mypy.ini + + docstring_parameters_sync: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + pip install --user pydocstyle + pydocstyle + + clang_format: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + curl https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-format-linux64 -o clang-format + chmod +x clang-format + sudo mv clang-format /opt/clang-format + ./.circleci/unittest/linux/scripts/run-clang-format.py -r torchvision/csrc --clang-format-executable /opt/clang-format + + torchhub_test: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + pip install --user --progress-bar off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + # need to install torchvision dependencies due to transitive imports + pip install --user --progress-bar off --editable . + python test/test_hub.py + + torch_onnx_test: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + pip install --user --progress-bar off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + # need to install torchvision dependencies due to transitive imports + pip install --user --progress-bar off --editable . + pip install --user onnx + pip install --user onnxruntime + python test/test_onnx.py + binary_linux_wheel: <<: *binary_common docker: @@ -83,6 +256,7 @@ jobs: resource_class: 2xlarge+ steps: - checkout_merge + - designate_upload_channel - run: packaging/build_wheel.sh - store_artifacts: path: dist @@ -94,10 +268,11 @@ jobs: binary_linux_conda: <<: *binary_common docker: - - image: "soumith/conda-cuda" + - image: "<< parameters.conda_docker_image >>" resource_class: 2xlarge+ steps: - checkout_merge + - designate_upload_channel - run: packaging/build_conda.sh - store_artifacts: path: /opt/conda/conda-bld/linux-64 @@ -105,112 +280,67 @@ jobs: root: /opt/conda/conda-bld/linux-64 paths: - "*" - - binary_linux_conda_cuda: - <<: *binary_common - machine: - image: ubuntu-1604:201903-01 - resource_class: gpu.medium - steps: - - checkout_merge - - run: - name: Setup environment - command: | - set -e - - curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add - - curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add - - - sudo apt-get update - - sudo apt-get install \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg-agent \ - software-properties-common - - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - - - sudo add-apt-repository \ - "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) \ - stable" - - sudo apt-get update - export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial" - sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io=1.2.6-3 - - # Add the package repositories - distribution=$(. /etc/os-release;echo $ID$VERSION_ID) - curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - - export NVIDIA_CONTAINER_VERSION="1.0.3-1" - sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION} - sudo systemctl restart docker - - DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run" - wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN" - sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false) - nvidia-smi - - - run: - name: Pull docker image - command: | - set -e - export DOCKER_IMAGE=soumith/conda-cuda - echo Pulling docker image $DOCKER_IMAGE - docker pull $DOCKER_IMAGE >/dev/null - - - run: - name: Build and run tests - command: | - set -e - - cd ${HOME}/project/ - - export DOCKER_IMAGE=soumith/conda-cuda - export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION" - - docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh + - store_test_results: + path: build_results/ binary_win_conda: <<: *binary_common - executor: - name: win/default - shell: bash.exe + executor: windows-cpu steps: - checkout_merge + - designate_upload_channel + - install_cuda_compatible_cmath - run: + name: Build conda packages + no_output_timeout: 20m command: | - choco install miniconda3 - (& "C:\tools\miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/windows/internal/cuda_install.bat + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" conda activate base conda install -yq conda-build "conda-package-handling!=1.5.0" - bash packaging/build_conda.sh - shell: powershell.exe + packaging/build_conda.sh + rm /C/tools/miniconda3/conda-bld/win-64/vs${VC_YEAR}*.tar.bz2 + - store_artifacts: + path: C:/tools/miniconda3/conda-bld/win-64 + - persist_to_workspace: + root: C:/tools/miniconda3/conda-bld/win-64 + paths: + - "*" + - store_test_results: + path: build_results/ - binary_win_conda_cuda: + binary_win_wheel: <<: *binary_common - executor: windows-gpu-prototype + executor: windows-cpu steps: - checkout_merge + - designate_upload_channel + - install_cuda_compatible_cmath - run: + name: Build wheel packages command: | - choco install miniconda3 - (& "C:\tools\miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression - conda activate base - conda install -yq conda-build "conda-package-handling!=1.5.0" - bash packaging/build_conda.sh - shell: powershell.exe + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/windows/internal/cuda_install.bat + packaging/build_wheel.sh + - store_artifacts: + path: dist + - persist_to_workspace: + root: dist + paths: + - "*" + - store_test_results: + path: build_results/ binary_macos_wheel: <<: *binary_common macos: - xcode: "9.0" + xcode: "9.4.1" steps: - checkout_merge + - designate_upload_channel - run: # Cannot easily deduplicate this as source'ing activate # will set environment variables which we need to propagate @@ -227,12 +357,50 @@ jobs: paths: - "*" + binary_ios_build: + <<: *torchvision_ios_params + macos: + xcode: "12.0" + steps: + - attach_workspace: + at: ~/workspace + - checkout + - run_brew_for_ios_build + - run: + name: Build + no_output_timeout: "1h" + command: | + script="/Users/distiller/project/.circleci/unittest/ios/scripts/binary_ios_build.sh" + cat "$script" + source "$script" + - persist_to_workspace: + root: /Users/distiller/workspace/ + paths: ios + + binary_ios_upload: + <<: *torchvision_ios_params + macos: + xcode: "12.0" + steps: + - attach_workspace: + at: ~/workspace + - checkout + - run_brew_for_ios_build + - run: + name: Upload + no_output_timeout: "1h" + command: | + script="/Users/distiller/project/.circleci/unittest/ios/scripts/binary_ios_upload.sh" + cat "$script" + source "$script" + binary_macos_conda: <<: *binary_common macos: - xcode: "9.0" + xcode: "9.4.1" steps: - checkout_merge + - designate_upload_channel - run: command: | curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh @@ -246,6 +414,8 @@ jobs: root: /Users/distiller/miniconda3/conda-bld/osx-64 paths: - "*" + - store_test_results: + path: build_results/ # Requires org-member context binary_conda_upload: @@ -254,16 +424,13 @@ jobs: steps: - attach_workspace: at: ~/workspace + - designate_upload_channel - run: command: | # Prevent credential from leaking conda install -yq anaconda-client - set +x - anaconda login \ - --username "$PYTORCH_BINARY_PJH5_CONDA_USERNAME" \ - --password "$PYTORCH_BINARY_PJH5_CONDA_PASSWORD" set -x - anaconda upload ~/workspace/*.tar.bz2 -u pytorch-nightly --label main --no-progress --force + anaconda -t "${CONDA_PYTORCHBOT_TOKEN}" upload ~/workspace/*.tar.bz2 -u "pytorch-${UPLOAD_CHANNEL}" --label main --no-progress --force # Requires org-member context binary_wheel_upload: @@ -276,6 +443,7 @@ jobs: steps: - attach_workspace: at: ~/workspace + - designate_upload_channel - checkout - run: command: | @@ -287,953 +455,3540 @@ jobs: export AWS_SECRET_ACCESS_KEY="${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}" set -x for pkg in ~/workspace/*.whl; do - aws s3 cp "$pkg" "s3://pytorch/whl/nightly/<< parameters.subfolder >>" --acl public-read + aws s3 cp "$pkg" "s3://pytorch/whl/${UPLOAD_CHANNEL}/<< parameters.subfolder >>" --acl public-read done + smoke_test_linux_conda: + <<: *smoke_test_common + steps: + - attach_workspace: + at: ~/workspace + - designate_upload_channel + - run: + name: install binaries + command: | + set -x + source /usr/local/etc/profile.d/conda.sh && conda activate python${PYTHON_VERSION} + conda install -v -y -c pytorch-nightly pytorch + conda install -v -y $(ls ~/workspace/torchvision*.tar.bz2) + - run: + name: smoke test + command: | + source /usr/local/etc/profile.d/conda.sh && conda activate python${PYTHON_VERSION} + python -c "import torchvision" -workflows: - build: - jobs: - - circleci_consistency - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py2.7_cpu - python_version: '2.7' - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py2.7u_cpu - python_version: '2.7' - unicode_abi: '1' - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py2.7_cu92 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py2.7u_cu92 - python_version: '2.7' - unicode_abi: '1' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py2.7_cu100 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py2.7u_cu100 - python_version: '2.7' - unicode_abi: '1' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py2.7_cu101 - python_version: '2.7' - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py2.7u_cu101 - python_version: '2.7' - unicode_abi: '1' - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.5_cpu - python_version: '3.5' - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.5_cu92 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py3.5_cu100 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.5_cu101 - python_version: '3.5' - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.6_cpu - python_version: '3.6' - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.6_cu92 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py3.6_cu100 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.6_cu101 - python_version: '3.6' - - binary_linux_wheel: - cu_version: cpu - name: binary_linux_wheel_py3.7_cpu - python_version: '3.7' - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.7_cu92 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_wheel: - cu_version: cu100 - name: binary_linux_wheel_py3.7_cu100 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_wheel: - cu_version: cu101 - name: binary_linux_wheel_py3.7_cu101 - python_version: '3.7' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py2.7_cpu - python_version: '2.7' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py2.7u_cpu - python_version: '2.7' - unicode_abi: '1' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.5_cpu - python_version: '3.5' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.6_cpu - python_version: '3.6' - - binary_macos_wheel: - cu_version: cpu - name: binary_macos_wheel_py3.7_cpu - python_version: '3.7' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py2.7_cpu - python_version: '2.7' - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py2.7_cu92 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_conda: - cu_version: cu100 - name: binary_linux_conda_py2.7_cu100 - python_version: '2.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py2.7_cu101 - python_version: '2.7' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.5_cpu - python_version: '3.5' - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.5_cu92 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_conda: - cu_version: cu100 - name: binary_linux_conda_py3.5_cu100 - python_version: '3.5' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.5_cu101 - python_version: '3.5' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.6_cpu - python_version: '3.6' - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.6_cu92 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_conda: - cu_version: cu100 - name: binary_linux_conda_py3.6_cu100 - python_version: '3.6' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.6_cu101 - python_version: '3.6' - - binary_linux_conda: - cu_version: cpu - name: binary_linux_conda_py3.7_cpu - python_version: '3.7' - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.7_cu92 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda92 - - binary_linux_conda: - cu_version: cu100 - name: binary_linux_conda_py3.7_cu100 - python_version: '3.7' - wheel_docker_image: soumith/manylinux-cuda100 - - binary_linux_conda: - cu_version: cu101 - name: binary_linux_conda_py3.7_cu101 - python_version: '3.7' - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py2.7_cpu - python_version: '2.7' - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.5_cpu - python_version: '3.5' - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.6_cpu - python_version: '3.6' - - binary_macos_conda: - cu_version: cpu - name: binary_macos_conda_py3.7_cpu - python_version: '3.7' - - binary_linux_conda_cuda: - name: torchvision_linux_py3.7_cu100 - python_version: "3.7" - cu_version: "cu100" - - binary_win_conda: - name: torchvision_win_py3.6_cpu - python_version: "3.6" - cu_version: "cpu" - - binary_win_conda_cuda: - name: torchvision_win_py3.6_cu101 - python_version: "3.6" - cu_version: "cu101" + smoke_test_linux_pip: + <<: *smoke_test_common + steps: + - attach_workspace: + at: ~/workspace + - designate_upload_channel + - run: + name: install binaries + command: | + set -x + source /usr/local/etc/profile.d/conda.sh && conda activate python${PYTHON_VERSION} + pip install $(ls ~/workspace/torchvision*.whl) --pre -f https://download.pytorch.org/whl/nightly/torch_nightly.html + - run: + name: smoke test + command: | + source /usr/local/etc/profile.d/conda.sh && conda activate python${PYTHON_VERSION} + python -c "import torchvision" - nightly: + smoke_test_docker_image_build: + machine: + image: ubuntu-1604:201903-01 + resource_class: large + environment: + image_name: torchvision/smoke_test + steps: + - checkout + - designate_upload_channel + - run: + name: Build and push Docker image + no_output_timeout: "1h" + command: | + set +x + echo "${DOCKER_HUB_TOKEN}" | docker login --username "${DOCKER_HUB_USERNAME}" --password-stdin + set -x + cd .circleci/smoke_test/docker && docker build . -t ${image_name}:${CIRCLE_WORKFLOW_ID} + docker tag ${image_name}:${CIRCLE_WORKFLOW_ID} ${image_name}:latest + docker push ${image_name}:${CIRCLE_WORKFLOW_ID} + docker push ${image_name}:latest + + smoke_test_win_conda: + <<: *binary_common + executor: + name: windows-cpu + steps: + - attach_workspace: + at: ~/workspace + - designate_upload_channel + - run: + name: install binaries + command: | + set -x + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + conda env remove -n python${PYTHON_VERSION} || true + CONDA_CHANNEL_FLAGS="" + if [[ "${PYTHON_VERSION}" = 3.9 ]]; then + CONDA_CHANNEL_FLAGS="-c=conda-forge" + fi + conda create ${CONDA_CHANNEL_FLAGS} -yn python${PYTHON_VERSION} python=${PYTHON_VERSION} + conda activate python${PYTHON_VERSION} + conda install Pillow + conda install -v -y -c pytorch-nightly pytorch + conda install -v -y $(ls ~/workspace/torchvision*.tar.bz2) + - run: + name: smoke test + command: | + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + conda activate python${PYTHON_VERSION} + python -c "import torchvision" + + smoke_test_win_pip: + <<: *binary_common + executor: + name: windows-cpu + steps: + - attach_workspace: + at: ~/workspace + - designate_upload_channel + - run: + name: install binaries + command: | + set -x + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + CONDA_CHANNEL_FLAGS="" + if [[ "${PYTHON_VERSION}" = 3.9 ]]; then + CONDA_CHANNEL_FLAGS="-c=conda-forge" + fi + conda create ${CONDA_CHANNEL_FLAGS} -yn python${PYTHON_VERSION} python=${PYTHON_VERSION} + conda create -yn python${PYTHON_VERSION} python=${PYTHON_VERSION} + conda activate python${PYTHON_VERSION} + pip install $(ls ~/workspace/torchvision*.whl) --pre -f https://download.pytorch.org/whl/nightly/torch_nightly.html + - run: + name: smoke test + command: | + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + conda activate python${PYTHON_VERSION} + python -c "import torchvision" + + unittest_linux_cpu: + <<: *binary_common + docker: + - image: "pytorch/manylinux-cuda102" + resource_class: 2xlarge+ + steps: + - checkout + - designate_upload_channel + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + + key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchvision + command: .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_linux_gpu: + <<: *binary_common + machine: + image: ubuntu-1604-cuda-10.1:201909-23 + resource_class: gpu.small + environment: + image_name: "pytorch/manylinux-cuda101" + PYTHON_VERSION: << parameters.python_version >> + steps: + - checkout + - designate_upload_channel + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v3-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + + key: env-v3-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchvision + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD -e UPLOAD_CHANNEL -e CU_VERSION "${image_name}" .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post Process + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_windows_cpu: + <<: *binary_common + executor: + name: windows-cpu + steps: + - checkout + - designate_upload_channel + - install_cuda_compatible_cmath + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: .circleci/unittest/windows/scripts/setup_env.sh + - save_cache: + + key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchvision + command: .circleci/unittest/windows/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/windows/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/windows/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_windows_gpu: + <<: *binary_common + executor: + name: windows-gpu + environment: + CUDA_VERSION: "10.1" + PYTHON_VERSION: << parameters.python_version >> + steps: + - checkout + - designate_upload_channel + - install_cuda_compatible_cmath + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: .circleci/unittest/windows/scripts/setup_env.sh + - save_cache: + + key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchvision + command: .circleci/unittest/windows/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/windows/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/windows/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_macos_cpu: + <<: *binary_common + macos: + xcode: "9.4.1" + resource_class: large + steps: + - checkout + - designate_upload_channel + - run: + name: Install wget + command: HOMEBREW_NO_AUTO_UPDATE=1 brew install wget + # Disable brew auto update which is very slow + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + + key: env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchvision + command: .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results + + cmake_linux_cpu: + <<: *binary_common + docker: + - image: "pytorch/manylinux-cuda102" + resource_class: 2xlarge+ + steps: + - checkout_merge + - designate_upload_channel + - run: + name: Setup conda + command: .circleci/unittest/linux/scripts/setup_env.sh + - run: packaging/build_cmake.sh + + cmake_linux_gpu: + <<: *binary_common + machine: + image: ubuntu-1604-cuda-10.1:201909-23 + resource_class: gpu.small + environment: + PYTHON_VERSION: << parameters.python_version >> + PYTORCH_VERSION: << parameters.pytorch_version >> + UNICODE_ABI: << parameters.unicode_abi >> + CU_VERSION: << parameters.cu_version >> + steps: + - checkout_merge + - designate_upload_channel + - run: + name: Setup conda + command: docker run -e CU_VERSION -e PYTHON_VERSION -e UNICODE_ABI -e PYTORCH_VERSION -t --gpus all -v $PWD:$PWD -w $PWD << parameters.wheel_docker_image >> .circleci/unittest/linux/scripts/setup_env.sh + - run: + name: Build torchvision C++ distribution and test + command: docker run -e CU_VERSION -e PYTHON_VERSION -e UNICODE_ABI -e PYTORCH_VERSION -e UPLOAD_CHANNEL -t --gpus all -v $PWD:$PWD -w $PWD << parameters.wheel_docker_image >> packaging/build_cmake.sh + + cmake_macos_cpu: + <<: *binary_common + macos: + xcode: "9.4.1" + steps: + - checkout_merge + - designate_upload_channel + - run: + command: | + curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh + sh conda.sh -b + source $HOME/miniconda3/bin/activate + conda install -yq conda-build cmake + packaging/build_cmake.sh + + cmake_windows_cpu: + <<: *binary_common + executor: + name: windows-cpu + steps: + - checkout_merge + - designate_upload_channel + - install_cuda_compatible_cmath + - run: + command: | + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/build_cmake.sh + + cmake_windows_gpu: + <<: *binary_common + executor: + name: windows-gpu + steps: + - checkout_merge + - designate_upload_channel + - install_cuda_compatible_cmath + - run: + command: | + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/windows/internal/cuda_install.bat + packaging/build_cmake.sh + + build_docs: + <<: *binary_common + docker: + - image: "pytorch/manylinux-cuda100" + resource_class: 2xlarge+ + steps: + - attach_workspace: + at: ~/workspace + - checkout + - run: + name: Setup + command: .circleci/unittest/linux/scripts/setup_env.sh + - designate_upload_channel + - run: + name: Install torchvision + command: .circleci/unittest/linux/scripts/install.sh + - run: + name: Build docs + command: | + set -ex + tag=${CIRCLE_TAG:1:5} + VERSION=${tag:-master} + eval "$(./conda/bin/conda shell.bash hook)" + conda activate ./env + pushd docs + pip install -r requirements.txt + make html + popd + - persist_to_workspace: + root: ./ + paths: + - "*" + - store_artifacts: + path: ./docs/build/html + destination: docs + + upload_docs: + <<: *binary_common + docker: + - image: "pytorch/manylinux-cuda100" + resource_class: 2xlarge+ + steps: + - attach_workspace: + at: ~/workspace + - run: + name: Generate netrc + command: | + # set credentials for https pushing + # requires the org-member context + cat > ~/.netrc \<> ${BASH_ENV} + install_cuda_compatible_cmath: + description: "Install CUDA compatible cmath" + steps: + - run: + name: _HACK_ Install CUDA compatible cmath + no_output_timeout: 1m + command: | + powershell .circleci/scripts/vs_install_cmath.ps1 + + brew_update: + description: "Update Homebrew and install base formulae" + steps: + - run: + name: Update Homebrew + no_output_timeout: "10m" + command: | + set -ex + + # Update repositories manually. + # Running `brew update` produces a comparison between the + # current checkout and the updated checkout, which takes a + # very long time because the existing checkout is 2y old. + for path in $(find /usr/local/Homebrew -type d -name .git) + do + cd $path/.. + git fetch --depth=1 origin + git reset --hard origin/master + done + + export HOMEBREW_NO_AUTO_UPDATE=1 + + # Install expect and moreutils so that we can call `unbuffer` and `ts`. + # moreutils installs a `parallel` executable by default, which conflicts + # with the executable from the GNU `parallel`, so we must unlink GNU + # `parallel` first, and relink it afterwards. + brew install coreutils + brew unlink parallel + brew install moreutils + brew link parallel --overwrite + brew install expect + + brew_install: + description: "Install Homebrew formulae" + parameters: + formulae: + type: string + default: "" + steps: + - run: + name: Install << parameters.formulae >> + no_output_timeout: "10m" + command: | + set -ex + export HOMEBREW_NO_AUTO_UPDATE=1 + brew install << parameters.formulae >> + + run_brew_for_ios_build: + steps: + - brew_update + - brew_install: + formulae: libtool binary_common: &binary_common parameters: - # Edit these defaults to do a release` + # Edit these defaults to do a release build_version: description: "version number of release binary; by default, build a nightly" type: string @@ -49,6 +125,7 @@ binary_common: &binary_common cu_version: description: "CUDA version to build against, in CU format (e.g., cpu or cu100)" type: string + default: "cpu" unicode_abi: description: "Python 2.7 wheel only: whether or not we are cp27mu (default: no)" type: string @@ -56,14 +133,38 @@ binary_common: &binary_common wheel_docker_image: description: "Wheel only: what docker image to use" type: string - default: "soumith/manylinux-cuda101" + default: "pytorch/manylinux-cuda101" + conda_docker_image: + description: "Conda only: what docker image to use" + type: string + default: "pytorch/conda-builder:cpu" environment: PYTHON_VERSION: << parameters.python_version >> - BUILD_VERSION: << parameters.build_version >> PYTORCH_VERSION: << parameters.pytorch_version >> UNICODE_ABI: << parameters.unicode_abi >> CU_VERSION: << parameters.cu_version >> +torchvision_ios_params: &torchvision_ios_params + parameters: + build_environment: + type: string + default: "" + ios_arch: + type: string + default: "" + ios_platform: + type: string + default: "" + environment: + BUILD_ENVIRONMENT: << parameters.build_environment >> + IOS_ARCH: << parameters.ios_arch >> + IOS_PLATFORM: << parameters.ios_platform >> + +smoke_test_common: &smoke_test_common + <<: *binary_common + docker: + - image: torchvision/smoke_test:latest + jobs: circleci_consistency: docker: @@ -76,6 +177,78 @@ jobs: python .circleci/regenerate.py git diff --exit-code || (echo ".circleci/config.yml not in sync with config.yml.in! Run .circleci/regenerate.py to update config"; exit 1) + python_lint: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + pip install --user --progress-bar off flake8 typing + flake8 --config=setup.cfg . + + python_type_check: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + sudo apt-get update -y + sudo apt install -y libturbojpeg-dev + pip install --user --progress-bar off mypy + pip install --user --progress-bar off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + pip install --user --progress-bar off --editable . + mypy --config-file mypy.ini + + docstring_parameters_sync: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + pip install --user pydocstyle + pydocstyle + + clang_format: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + curl https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-format-linux64 -o clang-format + chmod +x clang-format + sudo mv clang-format /opt/clang-format + ./.circleci/unittest/linux/scripts/run-clang-format.py -r torchvision/csrc --clang-format-executable /opt/clang-format + + torchhub_test: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + pip install --user --progress-bar off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + # need to install torchvision dependencies due to transitive imports + pip install --user --progress-bar off --editable . + python test/test_hub.py + + torch_onnx_test: + docker: + - image: circleci/python:3.7 + steps: + - checkout + - run: + command: | + pip install --user --progress-bar off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + # need to install torchvision dependencies due to transitive imports + pip install --user --progress-bar off --editable . + pip install --user onnx + pip install --user onnxruntime + python test/test_onnx.py + binary_linux_wheel: <<: *binary_common docker: @@ -83,6 +256,7 @@ jobs: resource_class: 2xlarge+ steps: - checkout_merge + - designate_upload_channel - run: packaging/build_wheel.sh - store_artifacts: path: dist @@ -94,10 +268,11 @@ jobs: binary_linux_conda: <<: *binary_common docker: - - image: "soumith/conda-cuda" + - image: "<< parameters.conda_docker_image >>" resource_class: 2xlarge+ steps: - checkout_merge + - designate_upload_channel - run: packaging/build_conda.sh - store_artifacts: path: /opt/conda/conda-bld/linux-64 @@ -105,112 +280,67 @@ jobs: root: /opt/conda/conda-bld/linux-64 paths: - "*" - - binary_linux_conda_cuda: - <<: *binary_common - machine: - image: ubuntu-1604:201903-01 - resource_class: gpu.medium - steps: - - checkout_merge - - run: - name: Setup environment - command: | - set -e - - curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add - - curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add - - - sudo apt-get update - - sudo apt-get install \ - apt-transport-https \ - ca-certificates \ - curl \ - gnupg-agent \ - software-properties-common - - curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - - - sudo add-apt-repository \ - "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ - $(lsb_release -cs) \ - stable" - - sudo apt-get update - export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial" - sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io=1.2.6-3 - - # Add the package repositories - distribution=$(. /etc/os-release;echo $ID$VERSION_ID) - curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - - curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list - - export NVIDIA_CONTAINER_VERSION="1.0.3-1" - sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION} - sudo systemctl restart docker - - DRIVER_FN="NVIDIA-Linux-x86_64-410.104.run" - wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN" - sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false) - nvidia-smi - - - run: - name: Pull docker image - command: | - set -e - export DOCKER_IMAGE=soumith/conda-cuda - echo Pulling docker image $DOCKER_IMAGE - docker pull $DOCKER_IMAGE >/dev/null - - - run: - name: Build and run tests - command: | - set -e - - cd ${HOME}/project/ - - export DOCKER_IMAGE=soumith/conda-cuda - export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION" - - docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh + - store_test_results: + path: build_results/ binary_win_conda: <<: *binary_common - executor: - name: win/default - shell: bash.exe + executor: windows-cpu steps: - checkout_merge + - designate_upload_channel + - install_cuda_compatible_cmath - run: + name: Build conda packages + no_output_timeout: 20m command: | - choco install miniconda3 - (& "C:\tools\miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/windows/internal/cuda_install.bat + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" conda activate base conda install -yq conda-build "conda-package-handling!=1.5.0" - bash packaging/build_conda.sh - shell: powershell.exe + packaging/build_conda.sh + rm /C/tools/miniconda3/conda-bld/win-64/vs${VC_YEAR}*.tar.bz2 + - store_artifacts: + path: C:/tools/miniconda3/conda-bld/win-64 + - persist_to_workspace: + root: C:/tools/miniconda3/conda-bld/win-64 + paths: + - "*" + - store_test_results: + path: build_results/ - binary_win_conda_cuda: + binary_win_wheel: <<: *binary_common - executor: windows-gpu-prototype + executor: windows-cpu steps: - checkout_merge + - designate_upload_channel + - install_cuda_compatible_cmath - run: + name: Build wheel packages command: | - choco install miniconda3 - (& "C:\tools\miniconda3\Scripts\conda.exe" "shell.powershell" "hook") | Out-String | Invoke-Expression - conda activate base - conda install -yq conda-build "conda-package-handling!=1.5.0" - bash packaging/build_conda.sh - shell: powershell.exe + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/windows/internal/cuda_install.bat + packaging/build_wheel.sh + - store_artifacts: + path: dist + - persist_to_workspace: + root: dist + paths: + - "*" + - store_test_results: + path: build_results/ binary_macos_wheel: <<: *binary_common macos: - xcode: "9.0" + xcode: "9.4.1" steps: - checkout_merge + - designate_upload_channel - run: # Cannot easily deduplicate this as source'ing activate # will set environment variables which we need to propagate @@ -227,12 +357,50 @@ jobs: paths: - "*" + binary_ios_build: + <<: *torchvision_ios_params + macos: + xcode: "12.0" + steps: + - attach_workspace: + at: ~/workspace + - checkout + - run_brew_for_ios_build + - run: + name: Build + no_output_timeout: "1h" + command: | + script="/Users/distiller/project/.circleci/unittest/ios/scripts/binary_ios_build.sh" + cat "$script" + source "$script" + - persist_to_workspace: + root: /Users/distiller/workspace/ + paths: ios + + binary_ios_upload: + <<: *torchvision_ios_params + macos: + xcode: "12.0" + steps: + - attach_workspace: + at: ~/workspace + - checkout + - run_brew_for_ios_build + - run: + name: Upload + no_output_timeout: "1h" + command: | + script="/Users/distiller/project/.circleci/unittest/ios/scripts/binary_ios_upload.sh" + cat "$script" + source "$script" + binary_macos_conda: <<: *binary_common macos: - xcode: "9.0" + xcode: "9.4.1" steps: - checkout_merge + - designate_upload_channel - run: command: | curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh @@ -246,6 +414,8 @@ jobs: root: /Users/distiller/miniconda3/conda-bld/osx-64 paths: - "*" + - store_test_results: + path: build_results/ # Requires org-member context binary_conda_upload: @@ -254,16 +424,13 @@ jobs: steps: - attach_workspace: at: ~/workspace + - designate_upload_channel - run: command: | # Prevent credential from leaking conda install -yq anaconda-client - set +x - anaconda login \ - --username "$PYTORCH_BINARY_PJH5_CONDA_USERNAME" \ - --password "$PYTORCH_BINARY_PJH5_CONDA_PASSWORD" set -x - anaconda upload ~/workspace/*.tar.bz2 -u pytorch-nightly --label main --no-progress --force + anaconda -t "${CONDA_PYTORCHBOT_TOKEN}" upload ~/workspace/*.tar.bz2 -u "pytorch-${UPLOAD_CHANNEL}" --label main --no-progress --force # Requires org-member context binary_wheel_upload: @@ -276,6 +443,7 @@ jobs: steps: - attach_workspace: at: ~/workspace + - designate_upload_channel - checkout - run: command: | @@ -287,31 +455,514 @@ jobs: export AWS_SECRET_ACCESS_KEY="${PYTORCH_BINARY_AWS_SECRET_ACCESS_KEY}" set -x for pkg in ~/workspace/*.whl; do - aws s3 cp "$pkg" "s3://pytorch/whl/nightly/<< parameters.subfolder >>" --acl public-read + aws s3 cp "$pkg" "s3://pytorch/whl/${UPLOAD_CHANNEL}/<< parameters.subfolder >>" --acl public-read done + smoke_test_linux_conda: + <<: *smoke_test_common + steps: + - attach_workspace: + at: ~/workspace + - designate_upload_channel + - run: + name: install binaries + command: | + set -x + source /usr/local/etc/profile.d/conda.sh && conda activate python${PYTHON_VERSION} + conda install -v -y -c pytorch-nightly pytorch + conda install -v -y $(ls ~/workspace/torchvision*.tar.bz2) + - run: + name: smoke test + command: | + source /usr/local/etc/profile.d/conda.sh && conda activate python${PYTHON_VERSION} + python -c "import torchvision" + + smoke_test_linux_pip: + <<: *smoke_test_common + steps: + - attach_workspace: + at: ~/workspace + - designate_upload_channel + - run: + name: install binaries + command: | + set -x + source /usr/local/etc/profile.d/conda.sh && conda activate python${PYTHON_VERSION} + pip install $(ls ~/workspace/torchvision*.whl) --pre -f https://download.pytorch.org/whl/nightly/torch_nightly.html + - run: + name: smoke test + command: | + source /usr/local/etc/profile.d/conda.sh && conda activate python${PYTHON_VERSION} + python -c "import torchvision" + + smoke_test_docker_image_build: + machine: + image: ubuntu-1604:201903-01 + resource_class: large + environment: + image_name: torchvision/smoke_test + steps: + - checkout + - designate_upload_channel + - run: + name: Build and push Docker image + no_output_timeout: "1h" + command: | + set +x + echo "${DOCKER_HUB_TOKEN}" | docker login --username "${DOCKER_HUB_USERNAME}" --password-stdin + set -x + cd .circleci/smoke_test/docker && docker build . -t ${image_name}:${CIRCLE_WORKFLOW_ID} + docker tag ${image_name}:${CIRCLE_WORKFLOW_ID} ${image_name}:latest + docker push ${image_name}:${CIRCLE_WORKFLOW_ID} + docker push ${image_name}:latest + + smoke_test_win_conda: + <<: *binary_common + executor: + name: windows-cpu + steps: + - attach_workspace: + at: ~/workspace + - designate_upload_channel + - run: + name: install binaries + command: | + set -x + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + conda env remove -n python${PYTHON_VERSION} || true + CONDA_CHANNEL_FLAGS="" + if [[ "${PYTHON_VERSION}" = 3.9 ]]; then + CONDA_CHANNEL_FLAGS="-c=conda-forge" + fi + conda create ${CONDA_CHANNEL_FLAGS} -yn python${PYTHON_VERSION} python=${PYTHON_VERSION} + conda activate python${PYTHON_VERSION} + conda install Pillow + conda install -v -y -c pytorch-nightly pytorch + conda install -v -y $(ls ~/workspace/torchvision*.tar.bz2) + - run: + name: smoke test + command: | + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + conda activate python${PYTHON_VERSION} + python -c "import torchvision" + + smoke_test_win_pip: + <<: *binary_common + executor: + name: windows-cpu + steps: + - attach_workspace: + at: ~/workspace + - designate_upload_channel + - run: + name: install binaries + command: | + set -x + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + CONDA_CHANNEL_FLAGS="" + if [[ "${PYTHON_VERSION}" = 3.9 ]]; then + CONDA_CHANNEL_FLAGS="-c=conda-forge" + fi + conda create ${CONDA_CHANNEL_FLAGS} -yn python${PYTHON_VERSION} python=${PYTHON_VERSION} + conda create -yn python${PYTHON_VERSION} python=${PYTHON_VERSION} + conda activate python${PYTHON_VERSION} + pip install $(ls ~/workspace/torchvision*.whl) --pre -f https://download.pytorch.org/whl/nightly/torch_nightly.html + - run: + name: smoke test + command: | + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + conda activate python${PYTHON_VERSION} + python -c "import torchvision" + + unittest_linux_cpu: + <<: *binary_common + docker: + - image: "pytorch/manylinux-cuda102" + resource_class: 2xlarge+ + steps: + - checkout + - designate_upload_channel + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + {% raw %} + keys: + - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + - run: + name: Setup + command: .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + {% raw %} + key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + paths: + - conda + - env + - run: + name: Install torchvision + command: .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_linux_gpu: + <<: *binary_common + machine: + image: ubuntu-1604-cuda-10.1:201909-23 + resource_class: gpu.small + environment: + image_name: "pytorch/manylinux-cuda101" + PYTHON_VERSION: << parameters.python_version >> + steps: + - checkout + - designate_upload_channel + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + {% raw %} + keys: + - env-v3-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + - run: + name: Setup + command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + {% raw %} + key: env-v3-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + paths: + - conda + - env + - run: + name: Install torchvision + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD -e UPLOAD_CHANNEL -e CU_VERSION "${image_name}" .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post Process + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_windows_cpu: + <<: *binary_common + executor: + name: windows-cpu + steps: + - checkout + - designate_upload_channel + - install_cuda_compatible_cmath + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + {% raw %} + keys: + - env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + - run: + name: Setup + command: .circleci/unittest/windows/scripts/setup_env.sh + - save_cache: + {% raw %} + key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + paths: + - conda + - env + - run: + name: Install torchvision + command: .circleci/unittest/windows/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/windows/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/windows/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_windows_gpu: + <<: *binary_common + executor: + name: windows-gpu + environment: + CUDA_VERSION: "10.1" + PYTHON_VERSION: << parameters.python_version >> + steps: + - checkout + - designate_upload_channel + - install_cuda_compatible_cmath + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + {% raw %} + keys: + - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + - run: + name: Setup + command: .circleci/unittest/windows/scripts/setup_env.sh + - save_cache: + {% raw %} + key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + paths: + - conda + - env + - run: + name: Install torchvision + command: .circleci/unittest/windows/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/windows/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/windows/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_macos_cpu: + <<: *binary_common + macos: + xcode: "9.4.1" + resource_class: large + steps: + - checkout + - designate_upload_channel + - run: + name: Install wget + command: HOMEBREW_NO_AUTO_UPDATE=1 brew install wget + # Disable brew auto update which is very slow + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + {% raw %} + keys: + - env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + - run: + name: Setup + command: .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + {% raw %} + key: env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + {% endraw %} + paths: + - conda + - env + - run: + name: Install torchvision + command: .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results + + cmake_linux_cpu: + <<: *binary_common + docker: + - image: "pytorch/manylinux-cuda102" + resource_class: 2xlarge+ + steps: + - checkout_merge + - designate_upload_channel + - run: + name: Setup conda + command: .circleci/unittest/linux/scripts/setup_env.sh + - run: packaging/build_cmake.sh + + cmake_linux_gpu: + <<: *binary_common + machine: + image: ubuntu-1604-cuda-10.1:201909-23 + resource_class: gpu.small + environment: + PYTHON_VERSION: << parameters.python_version >> + PYTORCH_VERSION: << parameters.pytorch_version >> + UNICODE_ABI: << parameters.unicode_abi >> + CU_VERSION: << parameters.cu_version >> + steps: + - checkout_merge + - designate_upload_channel + - run: + name: Setup conda + command: docker run -e CU_VERSION -e PYTHON_VERSION -e UNICODE_ABI -e PYTORCH_VERSION -t --gpus all -v $PWD:$PWD -w $PWD << parameters.wheel_docker_image >> .circleci/unittest/linux/scripts/setup_env.sh + - run: + name: Build torchvision C++ distribution and test + command: docker run -e CU_VERSION -e PYTHON_VERSION -e UNICODE_ABI -e PYTORCH_VERSION -e UPLOAD_CHANNEL -t --gpus all -v $PWD:$PWD -w $PWD << parameters.wheel_docker_image >> packaging/build_cmake.sh + + cmake_macos_cpu: + <<: *binary_common + macos: + xcode: "9.4.1" + steps: + - checkout_merge + - designate_upload_channel + - run: + command: | + curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh + sh conda.sh -b + source $HOME/miniconda3/bin/activate + conda install -yq conda-build cmake + packaging/build_cmake.sh + + cmake_windows_cpu: + <<: *binary_common + executor: + name: windows-cpu + steps: + - checkout_merge + - designate_upload_channel + - install_cuda_compatible_cmath + - run: + command: | + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/build_cmake.sh + + cmake_windows_gpu: + <<: *binary_common + executor: + name: windows-gpu + steps: + - checkout_merge + - designate_upload_channel + - install_cuda_compatible_cmath + - run: + command: | + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/windows/internal/cuda_install.bat + packaging/build_cmake.sh + + build_docs: + <<: *binary_common + docker: + - image: "pytorch/manylinux-cuda100" + resource_class: 2xlarge+ + steps: + - attach_workspace: + at: ~/workspace + - checkout + - run: + name: Setup + command: .circleci/unittest/linux/scripts/setup_env.sh + - designate_upload_channel + - run: + name: Install torchvision + command: .circleci/unittest/linux/scripts/install.sh + - run: + name: Build docs + command: | + set -ex + tag=${CIRCLE_TAG:1:5} + VERSION=${tag:-master} + eval "$(./conda/bin/conda shell.bash hook)" + conda activate ./env + pushd docs + pip install -r requirements.txt + make html + popd + - persist_to_workspace: + root: ./ + paths: + - "*" + - store_artifacts: + path: ./docs/build/html + destination: docs + + upload_docs: + <<: *binary_common + docker: + - image: "pytorch/manylinux-cuda100" + resource_class: 2xlarge+ + steps: + - attach_workspace: + at: ~/workspace + - run: + name: Generate netrc + command: | + # set credentials for https pushing + # requires the org-member context + cat > ~/.netrc \<> ~/.bashrc +RUN source /usr/local/etc/profile.d/conda.sh && conda activate python3.6 && conda install -y Pillow +RUN source /usr/local/etc/profile.d/conda.sh && conda activate python3.7 && conda install -y Pillow +RUN source /usr/local/etc/profile.d/conda.sh && conda activate python3.8 && conda install -y Pillow +CMD [ "/bin/bash"] diff --git a/.circleci/unittest/ios/scripts/binary_ios_build.sh b/.circleci/unittest/ios/scripts/binary_ios_build.sh new file mode 100755 index 00000000000..e2ad7b0c55f --- /dev/null +++ b/.circleci/unittest/ios/scripts/binary_ios_build.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -ex -o pipefail + +echo "" +echo "DIR: $(pwd)" +WORKSPACE=/Users/distiller/workspace +PROJ_ROOT_IOS=/Users/distiller/project/ios +PYTORCH_IOS_NIGHTLY_NAME=libtorch_ios_nightly_build.zip +export TCLLIBPATH="/usr/local/lib" + +# install conda +curl --retry 3 -o ~/conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh +chmod +x ~/conda.sh +/bin/bash ~/conda.sh -b -p ~/anaconda +export PATH="~/anaconda/bin:${PATH}" +source ~/anaconda/bin/activate + +# install dependencies +conda install numpy ninja pyyaml mkl mkl-include setuptools cmake cffi requests typing_extensions wget --yes +conda install -c conda-forge valgrind --yes +export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} + +# sync submodules +cd ${PROJ_ROOT_IOS} +git submodule sync +git submodule update --init --recursive + +# download pytorch-iOS nightly build and unzip it +mkdir -p ${PROJ_ROOT_IOS}/lib +mkdir -p ${PROJ_ROOT_IOS}/build +mkdir -p ${PROJ_ROOT_IOS}/pytorch +TORCH_ROOT="${PROJ_ROOT_IOS}/pytorch" + +cd ${TORCH_ROOT} +wget https://ossci-ios-build.s3.amazonaws.com/${PYTORCH_IOS_NIGHTLY_NAME} +mkdir -p ./build_ios +unzip -d ./build_ios ./${PYTORCH_IOS_NIGHTLY_NAME} + +LIBTORCH_HEADER_ROOT="${TORCH_ROOT}/build_ios/install/include" +cd ${PROJ_ROOT_IOS} +IOS_ARCH=${IOS_ARCH} LIBTORCH_HEADER_ROOT=${LIBTORCH_HEADER_ROOT} ./build_ios.sh +rm -rf ${TORCH_ROOT} + +# store the binary +DEST_DIR=${WORKSPACE}/ios/${IOS_ARCH} +mkdir -p ${DEST_DIR} +cp ${PROJ_ROOT_IOS}/lib/*.a ${DEST_DIR} diff --git a/.circleci/unittest/ios/scripts/binary_ios_upload.sh b/.circleci/unittest/ios/scripts/binary_ios_upload.sh new file mode 100644 index 00000000000..ce56388e5da --- /dev/null +++ b/.circleci/unittest/ios/scripts/binary_ios_upload.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -ex -o pipefail + +echo "" +echo "DIR: $(pwd)" + +WORKSPACE=/Users/distiller/workspace +PROJ_ROOT=/Users/distiller/project +ARTIFACTS_DIR=${WORKSPACE}/ios +ls ${ARTIFACTS_DIR} +ZIP_DIR=${WORKSPACE}/zip +mkdir -p ${ZIP_DIR}/install/lib + +# build a FAT bianry +cd ${ZIP_DIR}/install/lib +libs=("${ARTIFACTS_DIR}/x86_64/libtorchvision_ops.a" "${ARTIFACTS_DIR}/arm64/libtorchvision_ops.a") +lipo -create "${libs[@]}" -o ${ZIP_DIR}/install/lib/libtorchvision_ops.a +lipo -i ${ZIP_DIR}/install/lib/*.a + +# copy the license +cp ${PROJ_ROOT}/LICENSE ${ZIP_DIR}/ +# zip the library +ZIPFILE=libtorchvision_ops_ios_nightly_build.zip +cd ${ZIP_DIR} +#for testing +touch version.txt +echo $(date +%s) > version.txt +zip -r ${ZIPFILE} install version.txt LICENSE + +# upload to aws +# Install conda then 'conda install' awscli +curl --retry 3 -o ~/conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh +chmod +x ~/conda.sh +/bin/bash ~/conda.sh -b -p ~/anaconda +export PATH="~/anaconda/bin:${PATH}" +source ~/anaconda/bin/activate +conda install -c conda-forge awscli --yes +set +x +export AWS_ACCESS_KEY_ID=${AWS_S3_ACCESS_KEY_FOR_PYTORCH_BINARY_UPLOAD} +export AWS_SECRET_ACCESS_KEY=${AWS_S3_ACCESS_SECRET_FOR_PYTORCH_BINARY_UPLOAD} +set -x +aws s3 cp ${ZIPFILE} s3://ossci-ios-build/ --acl public-read diff --git a/.circleci/unittest/linux/scripts/environment.yml b/.circleci/unittest/linux/scripts/environment.yml new file mode 100644 index 00000000000..dcad1abfa31 --- /dev/null +++ b/.circleci/unittest/linux/scripts/environment.yml @@ -0,0 +1,18 @@ +channels: + - pytorch + - defaults + # using conda-forge for python v3.9 + - conda-forge +dependencies: + - pytest + - pytest-cov + - codecov + - pip + - libpng + - jpeg + - ca-certificates + - pip: + - future + - pillow>=4.1.1 + - scipy + - av diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh new file mode 100755 index 00000000000..bec090a491e --- /dev/null +++ b/.circleci/unittest/linux/scripts/install.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +unset PYTORCH_VERSION +# For unittest, nightly PyTorch is used as the following section, +# so no need to set PYTORCH_VERSION. +# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. + +set -e + +eval "$(./conda/bin/conda shell.bash hook)" +conda activate ./env + +if [ "${CU_VERSION:-}" == cpu ] ; then + cudatoolkit="cpuonly" +else + if [[ ${#CU_VERSION} -eq 4 ]]; then + CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}" + elif [[ ${#CU_VERSION} -eq 5 ]]; then + CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}" + fi + echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION" + version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")" + cudatoolkit="cudatoolkit=${version}" +fi + +printf "Installing PyTorch with %s\n" "${cudatoolkit}" +conda install -y -c "pytorch-${UPLOAD_CHANNEL}" -c conda-forge "pytorch-${UPLOAD_CHANNEL}"::pytorch "${cudatoolkit}" + +if [ $PYTHON_VERSION == "3.6" ]; then + printf "Installing minimal PILLOW version\n" + # Install the minimal PILLOW version. Otherwise, let setup.py install the latest + pip install pillow==5.3.0 +fi + +printf "* Installing torchvision\n" +python setup.py develop diff --git a/.circleci/unittest/linux/scripts/post_process.sh b/.circleci/unittest/linux/scripts/post_process.sh new file mode 100755 index 00000000000..a84a0dea55e --- /dev/null +++ b/.circleci/unittest/linux/scripts/post_process.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -e + +eval "$(./conda/bin/conda shell.bash hook)" +conda activate ./env + +codecov diff --git a/travis-scripts/run-clang-format/run-clang-format.py b/.circleci/unittest/linux/scripts/run-clang-format.py similarity index 89% rename from travis-scripts/run-clang-format/run-clang-format.py rename to .circleci/unittest/linux/scripts/run-clang-format.py index 3f16c833b63..7bbd1acd0f4 100755 --- a/travis-scripts/run-clang-format/run-clang-format.py +++ b/.circleci/unittest/linux/scripts/run-clang-format.py @@ -1,4 +1,27 @@ #!/usr/bin/env python +""" +MIT License + +Copyright (c) 2017 Guillaume Papin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" """A wrapper script around clang-format, suitable for linting multiple files and to use for continuous integration. @@ -8,8 +31,6 @@ """ -from __future__ import print_function, unicode_literals - import argparse import codecs import difflib @@ -30,7 +51,7 @@ DEVNULL = open(os.devnull, "wb") -DEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx' +DEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx,cu' class ExitStatus: @@ -129,11 +150,6 @@ def run_clang_format_diff(args, file): # > Each translation completely replaces the format string # > for the diagnostic. # > -- http://clang.llvm.org/docs/InternalsManual.html#internals-diag-translation - # - # It's not pretty, due to Python 2 & 3 compatibility. - encoding_py3 = {} - if sys.version_info[0] >= 3: - encoding_py3['encoding'] = 'utf-8' try: proc = subprocess.Popen( @@ -141,7 +157,7 @@ def run_clang_format_diff(args, file): stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, - **encoding_py3) + encoding='utf-8') except OSError as exc: raise DiffError( "Command '{}' failed to start: {}".format( @@ -150,12 +166,7 @@ def run_clang_format_diff(args, file): ) proc_stdout = proc.stdout proc_stderr = proc.stderr - if sys.version_info[0] < 3: - # make the pipes compatible with Python 3, - # reading lines should output unicode - encoding = 'utf-8' - proc_stdout = codecs.getreader(encoding)(proc_stdout) - proc_stderr = codecs.getreader(encoding)(proc_stderr) + # hopefully the stderr pipe won't get full and block the process outs = list(proc_stdout.readlines()) errs = list(proc_stderr.readlines()) @@ -203,10 +214,7 @@ def red(s): def print_diff(diff_lines, use_color): if use_color: diff_lines = colorize(diff_lines) - if sys.version_info[0] < 3: - sys.stdout.writelines((l.encode('utf-8') for l in diff_lines)) - else: - sys.stdout.writelines(diff_lines) + sys.stdout.writelines(diff_lines) def print_trouble(prog, message, use_colors): diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh new file mode 100755 index 00000000000..419b9eb562c --- /dev/null +++ b/.circleci/unittest/linux/scripts/run_test.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -e + +eval "$(./conda/bin/conda shell.bash hook)" +conda activate ./env + +export PYTORCH_TEST_WITH_SLOW='1' +python -m torch.utils.collect_env +pytest --cov=torchvision --junitxml=test-results/junit.xml -v --durations 20 test --ignore=test/test_datasets_download.py diff --git a/.circleci/unittest/linux/scripts/setup_env.sh b/.circleci/unittest/linux/scripts/setup_env.sh new file mode 100755 index 00000000000..773bd78f202 --- /dev/null +++ b/.circleci/unittest/linux/scripts/setup_env.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +# This script is for setting up environment in which unit test is ran. +# To speed up the CI time, the resulting environment is cached. +# +# Do not install PyTorch and torchvision here, otherwise they also get cached. + +set -e + +this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +root_dir="$(git rev-parse --show-toplevel)" +conda_dir="${root_dir}/conda" +env_dir="${root_dir}/env" + +cd "${root_dir}" + +case "$(uname -s)" in + Darwin*) os=MacOSX;; + *) os=Linux +esac + +# 1. Install conda at ./conda +if [ ! -d "${conda_dir}" ]; then + printf "* Installing conda\n" + wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh" + bash ./miniconda.sh -b -f -p "${conda_dir}" +fi +eval "$(${conda_dir}/bin/conda shell.bash hook)" + +# 2. Create test environment at ./env +if [ ! -d "${env_dir}" ]; then + printf "* Creating a test environment\n" + conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION" +fi +conda activate "${env_dir}" + +# 3. Install Conda dependencies +printf "* Installing dependencies (except PyTorch)\n" +FFMPEG_PIN="=4.2" +if [[ "${PYTHON_VERSION}" = "3.9" ]]; then + FFMPEG_PIN=">=4.2" +fi + +conda install -y -c pytorch "ffmpeg${FFMPEG_PIN}" +conda env update --file "${this_dir}/environment.yml" --prune diff --git a/.circleci/unittest/windows/scripts/environment.yml b/.circleci/unittest/windows/scripts/environment.yml new file mode 100644 index 00000000000..b4f32cb3cad --- /dev/null +++ b/.circleci/unittest/windows/scripts/environment.yml @@ -0,0 +1,19 @@ +channels: + - pytorch + - defaults + # use conda-forge for python v3.9+ + - conda-forge +dependencies: + - pytest + - pytest-cov + - codecov + - pip + - libpng + - jpeg + - ca-certificates + - pip: + - future + - pillow>=4.1.1 + - scipy + - av + - dataclasses diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh new file mode 100644 index 00000000000..ac5222a7b90 --- /dev/null +++ b/.circleci/unittest/windows/scripts/install.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +unset PYTORCH_VERSION +# For unittest, nightly PyTorch is used as the following section, +# so no need to set PYTORCH_VERSION. +# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. + +set -e + +this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" +conda activate ./env + +if [ "${CU_VERSION:-}" == cpu ] ; then + cudatoolkit="cpuonly" +else + if [[ ${#CU_VERSION} -eq 4 ]]; then + CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}" + elif [[ ${#CU_VERSION} -eq 5 ]]; then + CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}" + fi + echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION" + version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")" + cudatoolkit="cudatoolkit=${version}" +fi + +printf "Installing PyTorch with %s\n" "${cudatoolkit}" +conda install -y -c "pytorch-${UPLOAD_CHANNEL}" -c conda-forge "pytorch-${UPLOAD_CHANNEL}"::pytorch "${cudatoolkit}" + +if [ $PYTHON_VERSION == "3.6" ]; then + printf "Installing minimal PILLOW version\n" + # Install the minimal PILLOW version. Otherwise, let setup.py install the latest + pip install pillow==5.3.0 +fi + +printf "* Installing torchvision\n" +"$this_dir/vc_env_helper.bat" python setup.py develop diff --git a/packaging/conda/install_conda.bat b/.circleci/unittest/windows/scripts/install_conda.bat similarity index 59% rename from packaging/conda/install_conda.bat rename to .circleci/unittest/windows/scripts/install_conda.bat index 6052ad08b10..6612fba56f6 100644 --- a/packaging/conda/install_conda.bat +++ b/.circleci/unittest/windows/scripts/install_conda.bat @@ -1 +1 @@ -start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% +start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/post_process.sh b/.circleci/unittest/windows/scripts/post_process.sh new file mode 100644 index 00000000000..b132113194b --- /dev/null +++ b/.circleci/unittest/windows/scripts/post_process.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -e + +eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" +conda activate ./env + +codecov diff --git a/.circleci/unittest/windows/scripts/run_test.sh b/.circleci/unittest/windows/scripts/run_test.sh new file mode 100644 index 00000000000..96d9cbd6b2d --- /dev/null +++ b/.circleci/unittest/windows/scripts/run_test.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +set -e + +eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" +conda activate ./env + +export PYTORCH_TEST_WITH_SLOW='1' +python -m torch.utils.collect_env +pytest --cov=torchvision --junitxml=test-results/junit.xml -v --durations 20 test --ignore=test/test_datasets_download.py diff --git a/.circleci/unittest/windows/scripts/setup_env.sh b/.circleci/unittest/windows/scripts/setup_env.sh new file mode 100644 index 00000000000..b0b70631112 --- /dev/null +++ b/.circleci/unittest/windows/scripts/setup_env.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# This script is for setting up environment in which unit test is ran. +# To speed up the CI time, the resulting environment is cached. +# +# Do not install PyTorch and torchvision here, otherwise they also get cached. + +set -e + +this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +root_dir="$(git rev-parse --show-toplevel)" +conda_dir="${root_dir}/conda" +env_dir="${root_dir}/env" + +cd "${root_dir}" + +# 1. Install conda at ./conda +if [ ! -d "${conda_dir}" ]; then + printf "* Installing conda\n" + export tmp_conda="$(echo $conda_dir | tr '/' '\\')" + export miniconda_exe="$(echo $root_dir | tr '/' '\\')\\miniconda.exe" + curl --output miniconda.exe https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -O + "$this_dir/install_conda.bat" + unset tmp_conda + unset miniconda_exe +fi + +eval "$(${conda_dir}/Scripts/conda.exe 'shell.bash' 'hook')" + +# 2. Create test environment at ./env +if [ ! -d "${env_dir}" ]; then + printf "* Creating a test environment\n" + conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION" +fi +conda activate "${env_dir}" + +# 3. Install Conda dependencies +printf "* Installing dependencies (except PyTorch)\n" +conda env update --file "${this_dir}/environment.yml" --prune diff --git a/.circleci/unittest/windows/scripts/vc_env_helper.bat b/.circleci/unittest/windows/scripts/vc_env_helper.bat new file mode 100644 index 00000000000..9410135677a --- /dev/null +++ b/.circleci/unittest/windows/scripts/vc_env_helper.bat @@ -0,0 +1,39 @@ +@echo on + +set VC_VERSION_LOWER=16 +set VC_VERSION_UPPER=17 + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere +if "%VSDEVCMD_ARGS%" == "" ( + call "%VS15VCVARSALL%" x64 || exit /b 1 +) else ( + call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 +) + +@echo on + +set DISTUTILS_USE_SDK=1 + +set args=%1 +shift +:start +if [%1] == [] goto done +set args=%args% %1 +shift +goto start + +:done +if "%args%" == "" ( + echo Usage: vc_env_helper.bat [command] [args] + echo e.g. vc_env_helper.bat cl /c test.cpp +) + +%args% || exit /b 1 diff --git a/.gitattributes b/.gitattributes index a476e7afb59..22d0452f8d7 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,8 @@ *.pkl binary +# Jupyter notebook + +# For text count +# *.ipynb text + +# To ignore it use below +*.ipynb linguist-documentation diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 00000000000..7f64d09da50 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,52 @@ +--- +name: "\U0001F41B Bug Report" +about: Create a report to help us improve torchvision +title: '' +labels: '' +assignees: '' + +--- + +## 🐛 Bug + + + +## To Reproduce + +Steps to reproduce the behavior: + +1. +1. +1. + + + +## Expected behavior + + + +## Environment + +Please copy and paste the output from our +[environment collection script](https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py) +(or fill out the checklist below manually). + +You can get the script and run it with: +``` +wget https://raw.githubusercontent.com/pytorch/pytorch/master/torch/utils/collect_env.py +# For security purposes, please check the contents of collect_env.py before running it. +python collect_env.py +``` + + - PyTorch / torchvision Version (e.g., 1.0 / 0.4.0): + - OS (e.g., Linux): + - How you installed PyTorch / torchvision (`conda`, `pip`, source): + - Build command you used (if compiling from source): + - Python version: + - CUDA/cuDNN version: + - GPU models and configuration: + - Any other relevant information: + +## Additional context + + diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md new file mode 100644 index 00000000000..a3618080a87 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -0,0 +1,12 @@ +--- +name: "\U0001F4DA Documentation" +about: Report an issue related to https://pytorch.org/docs +title: '' +labels: '' +assignees: '' + +--- + +## 📚 Documentation + + diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 00000000000..2187981e6d4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,27 @@ +--- +name: "\U0001F680Feature Request" +about: Submit a proposal/request for a new torchvision feature +title: '' +labels: '' +assignees: '' + +--- + +## 🚀 Feature + + +## Motivation + + + +## Pitch + + + +## Alternatives + + + +## Additional context + + diff --git a/.github/ISSUE_TEMPLATE/questions-help-support.md b/.github/ISSUE_TEMPLATE/questions-help-support.md new file mode 100644 index 00000000000..fb59e084128 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/questions-help-support.md @@ -0,0 +1,16 @@ +--- +name: "❓Questions/Help/Support" +about: Do you need support? We have resources. +title: '' +labels: '' +assignees: '' + +--- + +## ❓ Questions and Help + +### Please note that this issue tracker is not a help form and this issue will be closed. + +We have a set of [listed resources available on the website](https://pytorch.org/resources). Our primary means of support is our discussion forum: + +- [Discussion Forum](https://discuss.pytorch.org/) diff --git a/.github/failed_schedule_issue_template.md b/.github/failed_schedule_issue_template.md new file mode 100644 index 00000000000..5e2d77550ac --- /dev/null +++ b/.github/failed_schedule_issue_template.md @@ -0,0 +1,13 @@ +--- +title: Scheduled workflow failed +labels: + - bug + - "module: datasets" +--- + +Oh no, something went wrong in the scheduled workflow {{ env.WORKFLOW }}/{{ env.JOB }}. +Please look into it: + +https://github.com/{{ env.REPO }}/actions/runs/{{ env.ID }} + +Feel free to close this if this was just a one-off error. diff --git a/.github/pytorch-probot.yml b/.github/pytorch-probot.yml new file mode 100644 index 00000000000..27d0f2a1f0b --- /dev/null +++ b/.github/pytorch-probot.yml @@ -0,0 +1 @@ +tracking_issue: 2447 diff --git a/.github/workflows/bandit.yml b/.github/workflows/bandit.yml new file mode 100644 index 00000000000..93bae80f9bd --- /dev/null +++ b/.github/workflows/bandit.yml @@ -0,0 +1,23 @@ +# GitHub Actions Bandit Workflow + +name: Bandit + +on: + pull_request: + branches: [ master ] + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + # Task will fail if any high-severity issues are found + # Ignoring submodules + - name: Run Bandit Security Analysis + run: | + python -m pip install bandit + python -m bandit -r . -x ./third_party -lll diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000000..387d82ec343 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,43 @@ +# GitHub Actions CodeQL Workflow + +name: CodeQL + +on: + pull_request: + branches: [ master ] + + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: python, cpp + + - name: Install Ninja + run: | + sudo apt-get update -y + sudo apt-get install -y ninja-build + + - name: Update submodules + run: git submodule update --init --recursive + + - name: Install Torch + run: | + python -m pip install cmake + python -m pip install torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html + sudo ln -s /usr/bin/ninja /usr/bin/ninja-build + + - name: Build TorchVision + run: python setup.py develop --user + + # If any code scanning alerts are found, they will be under Security -> CodeQL + # Link: https://github.com/pytorch/vision/security/code-scanning + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 diff --git a/.github/workflows/tests-schedule.yml b/.github/workflows/tests-schedule.yml new file mode 100644 index 00000000000..65f805ce471 --- /dev/null +++ b/.github/workflows/tests-schedule.yml @@ -0,0 +1,54 @@ +name: tests + +on: + pull_request: + paths: + - "test/test_datasets_download.py" + - ".github/failed_schedule_issue_template.md" + - ".github/workflows/tests-schedule.yml" + + schedule: + - cron: "0 9 * * *" + +jobs: + download: + runs-on: ubuntu-latest + + steps: + - name: Set up python + uses: actions/setup-python@v2 + with: + python-version: 3.6 + + - name: Upgrade pip + run: python -m pip install --upgrade pip + + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Install torch nightly build + run: pip install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + + - name: Install torchvision + run: pip install -e . + + - name: Install all optional dataset requirements + run: pip install scipy pandas pycocotools lmdb requests + + - name: Install tests requirements + run: pip install pytest + + - name: Run tests + run: pytest -ra -v test/test_datasets_download.py + + - uses: JasonEtco/create-an-issue@v2.4.0 + name: Create issue if download tests failed + if: failure() && github.event_name == 'schedule' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + WORKFLOW: ${{ github.workflow }} + JOB: ${{ github.job }} + ID: ${{ github.run_id }} + with: + filename: .github/failed_schedule_issue_template.md diff --git a/.gitignore b/.gitignore index 5f483c84327..e6e4e0f3728 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,9 @@ htmlcov *.swp *.swo gen.yml +.mypy_cache +.vscode/ +.idea/ +*.orig +*-checkpoint.ipynb +*.venv diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 69c34f0f690..00000000000 --- a/.travis.yml +++ /dev/null @@ -1,86 +0,0 @@ -language: python - -dist: xenial -matrix: - include: - - env: FORMAT_CHECK - language: cpp - addons: - apt: - sources: - - llvm-toolchain-xenial-7 - packages: - - clang-7 - - clang-format-7 - before_install: skip - install: skip - script: ./travis-scripts/run-clang-format/run-clang-format.py -r torchvision/csrc - - env: LINT_CHECK - python: "2.7" - install: pip install flake8 typing - script: flake8 --exclude .circleci - after_success: [] - - env: LINT_CHECK - python: "3.6" - install: pip install flake8 typing - script: flake8 .circleci - after_success: [] - - python: "2.7" - env: IMAGE_BACKEND=Pillow-SIMD - - python: "2.7" - - python: "3.6" - env: IMAGE_BACKEND=Pillow-SIMD - - python: "3.6" - -before_install: - - sudo apt-get update - - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - hash -r - - conda config --set always_yes yes --set changeps1 no - # Useful for debugging any issues with conda - - conda info -a - - - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION pytorch scipy -c pytorch-nightly - - source activate test-environment - - | - if [[ "$IMAGE_BACKEND" == "Pillow-SIMD" ]]; then - pip uninstall -y pillow && CC="cc -march=native" pip install --force-reinstall pillow-simd - fi - - pip install future - - pip install pytest pytest-cov codecov - - pip install mock - - pip install typing - - | - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then - pip install onnxruntime - fi - - conda install av -c conda-forge - - -install: - # Using pip instead of setup.py ensures we install a non-compressed version of the package - # (as opposed to an egg), which is necessary to collect coverage. - # We still get the benefit of testing an installed version over the - # test version to iron out installation file-inclusion bugs but can - # also collect coverage. - - pip install . - # Move to home dir, otherwise we'll end up with the path to the - # package in $PWD rather than the installed v - - | - cd $HOME - export TV_INSTALL_PATH="$(python -c 'import os; import torchvision; print(os.path.dirname(os.path.abspath(torchvision.__file__)))')" - echo "$TV_INSTALL_PATH" - cd - - -script: - - pytest --cov-config .coveragerc --cov torchvision --cov $TV_INSTALL_PATH -k 'not TestVideoReader and not TestVideoTransforms' test - - pytest test/test_hub.py - -after_success: - # Necessary to run coverage combine to rewrite paths from - # /travis/env/path/site-packages/torchvision to actual path - - coverage combine .coverage - - coverage report - - codecov diff --git a/CMakeLists.txt b/CMakeLists.txt index df77482c870..547ab7ddd2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,16 +1,108 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.12) project(torchvision) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) +file(STRINGS version.txt TORCHVISION_VERSION) + +option(WITH_CUDA "Enable CUDA support" OFF) + +if(WITH_CUDA) + enable_language(CUDA) + add_definitions(-D__CUDA_NO_HALF_OPERATORS__) + add_definitions(-DWITH_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") +endif() + +find_package(Python3 COMPONENTS Development) find_package(Torch REQUIRED) +find_package(PNG REQUIRED) +find_package(JPEG REQUIRED) + +function(CUDA_CONVERT_FLAGS EXISTING_TARGET) + get_property(old_flags TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS) + if(NOT "${old_flags}" STREQUAL "") + string(REPLACE ";" "," CUDA_flags "${old_flags}") + set_property(TARGET ${EXISTING_TARGET} PROPERTY INTERFACE_COMPILE_OPTIONS + "$<$>:${old_flags}>$<$>:-Xcompiler=${CUDA_flags}>" + ) + endif() +endfunction() + +if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4819") + if(WITH_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=/wd4819") + foreach(diag cc_clobber_ignored integer_sign_change useless_using_declaration + set_but_not_used field_without_dll_interface + base_class_has_different_dll_interface + dll_interface_conflict_none_assumed + dll_interface_conflict_dllexport_assumed + implicit_return_from_non_void_function + unsigned_compare_with_zero + declared_but_not_referenced + bad_friend_decl) + string(APPEND CMAKE_CUDA_FLAGS " -Xcudafe --diag_suppress=${diag}") + endforeach() + CUDA_CONVERT_FLAGS(torch_cpu) + if(TARGET torch_cuda) + CUDA_CONVERT_FLAGS(torch_cuda) + endif() + if(TARGET torch_cuda_cu) + CUDA_CONVERT_FLAGS(torch_cuda_cu) + endif() + if(TARGET torch_cuda_cpp) + CUDA_CONVERT_FLAGS(torch_cuda_cpp) + endif() + endif() +endif() + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +set(TVCPP torchvision/csrc) +list(APPEND ALLOW_LISTED ${TVCPP} ${TVCPP}/io/image ${TVCPP}/io/image/cpu ${TVCPP}/models ${TVCPP}/ops + ${TVCPP}/ops/autograd ${TVCPP}/ops/cpu) +if(WITH_CUDA) + list(APPEND ALLOW_LISTED ${TVCPP}/ops/cuda ${TVCPP}/ops/autocast) +endif() + +FOREACH(DIR ${ALLOW_LISTED}) + file(GLOB ALL_SOURCES ${ALL_SOURCES} ${DIR}/*.*) +ENDFOREACH() + +add_library(${PROJECT_NAME} SHARED ${ALL_SOURCES}) +target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} ${PNG_LIBRARY} ${JPEG_LIBRARIES} Python3::Python) +set_target_properties(${PROJECT_NAME} PROPERTIES + EXPORT_NAME TorchVision + INSTALL_RPATH ${TORCH_INSTALL_PREFIX}/lib) + +include_directories(torchvision/csrc ${JPEG_INCLUDE_DIRS} ${PNG_INCLUDE_DIRS}) + +set(TORCHVISION_CMAKECONFIG_INSTALL_DIR "share/cmake/TorchVision" CACHE STRING "install path for TorchVisionConfig.cmake") + +configure_package_config_file(cmake/TorchVisionConfig.cmake.in + "${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfig.cmake" + INSTALL_DESTINATION ${TORCHVISION_CMAKECONFIG_INSTALL_DIR}) + +write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfigVersion.cmake + VERSION ${TORCHVISION_VERSION} + COMPATIBILITY AnyNewerVersion) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfig.cmake + ${CMAKE_CURRENT_BINARY_DIR}/TorchVisionConfigVersion.cmake + DESTINATION ${TORCHVISION_CMAKECONFIG_INSTALL_DIR}) -file(GLOB HEADERS torchvision/csrc/vision.h) -file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h) -file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp) +install(TARGETS ${PROJECT_NAME} + EXPORT TorchVisionTargets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) -add_library (${PROJECT_NAME} SHARED ${MODELS_SOURCES}) -target_link_libraries(${PROJECT_NAME} PUBLIC "${TORCH_LIBRARIES}") +install(EXPORT TorchVisionTargets + NAMESPACE TorchVision:: + DESTINATION ${TORCHVISION_CMAKECONFIG_INSTALL_DIR}) -install(TARGETS ${PROJECT_NAME} DESTINATION ${CMAKE_INSTALL_PREFIX}/lib) -install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}) -install(FILES ${MODELS_HEADERS} DESTINATION ${CMAKE_INSTALL_PREFIX}/include/${PROJECT_NAME}/models) +FOREACH(INPUT_DIR ${ALLOW_LISTED}) + string(REPLACE "${TVCPP}" "${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}" OUTPUT_DIR ${INPUT_DIR}) + file(GLOB INPUT_FILES ${INPUT_DIR}/*.*) + install(FILES ${INPUT_FILES} DESTINATION ${OUTPUT_DIR}) +ENDFOREACH() diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000000..b91e23b17c0 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or +advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic +address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a +professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..3fd20df6ca1 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,157 @@ +# Contributing to Torchvision + +We want to make contributing to this project as easy and transparent as possible. + +## TL;DR + +We appreciate all contributions. If you are interested in contributing to Torchvision, there are many ways to help out. +Your contributions may fall into the following categories: + +- It helps the project if you could + - Report issues you're facing + - Give a :+1: on issues that others reported and that are relevant to you + +- Answering queries on the issue tracker, investigating bugs are very valuable contributions to the project. + +- You would like to improve the documentation. This is no less important than improving the library itself! +If you find a typo in the documentation, do not hesitate to submit a GitHub pull request. + +- If you would like to fix a bug + - please pick one from the [list of open issues labelled as "help wanted"](https://github.com/pytorch/vision/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22) + - comment on the issue that you want to work on this issue + - send a PR with your fix, see below. + +- If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us. + +## Issues + +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + +## Development installation + +### Install PyTorch Nightly + +```bash +conda install pytorch -c pytorch-nightly -c conda-forge +# or with pip (see https://pytorch.org/get-started/locally/) +# pip install numpy +# pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html +``` + +### Install Torchvision + +```bash +git clone https://github.com/pytorch/vision.git +cd vision +python setup.py install +# or, for OSX +# MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install +# for C++ debugging, please use DEBUG=1 +# DEBUG=1 python setup.py install +pip install flake8 typing mypy pytest scipy +``` +You may also have to install `libpng-dev` and `libjpeg-turbo8-dev` libraries: +```bash +conda install libpng jpeg +``` + +## Development Process + +If you plan to modify the code or documentation, please follow the steps below: + +1. Fork the repository and create your branch from `master`. +2. If you have modified the code (new feature or bug-fix), please add unit tests. +3. If you have changed APIs, update the documentation. Make sure the documentation builds. +4. Ensure the test suite passes. +5. Make sure your code passes `flake8` formatting check. + +For more details about pull requests, +please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). + +If you would like to contribute a new model, please see [here](#New-model). + +If you would like to contribute a new dataset, please see [here](#New-dataset). + +### Code formatting and typing + +New code should be compatible with Python 3.X versions and be compliant with PEP8. To check the codebase, please run +```bash +flake8 --config=setup.cfg . +``` + +The codebase has type annotations, please make sure to add type hints if required. We use `mypy` tool for type checking: +```bash +mypy --config-file mypy.ini +``` + +### Unit tests + +If you have modified the code by adding a new feature or a bug-fix, please add unit tests for that. To run a specific +test: +```bash +pytest test/ -vvv -k +# e.g. pytest test/test_transforms.py -vvv -k test_center_crop +``` + +If you would like to run all tests: +```bash +pytest test -vvv +``` + +Tests that require internet access should be in +`test/test_internet.py`. + +### Documentation + +Torchvision uses [Google style](http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) +for formatting docstrings. Length of line inside docstrings block must be limited to 120 characters. + +Please, follow the instructions to build and deploy the documentation locally. + +#### Install requirements + +```bash +cd docs +pip install -r requirements.txt +``` + +#### Build + +```bash +cd docs +make html +``` + +#### Local deployment + +Please, use python 3.X for the command below: +```bash +cd docs/build/html +python -m http.server +# e.g. python -m http.server 1234 +``` +Then open the browser at `0.0.0.0:` (e.g. `0.0.0.0:1234`) + +### New model + +More details on how to add a new model will be provided later. Please, do not send any PR with a new model without discussing +it in an issue as, most likely, it will not be accepted. + +### New dataset + +More details on how to add a new dataset will be provided later. Please, do not send any PR with a new dataset without discussing +it in an issue as, most likely, it will not be accepted. + +### Pull Request + +If all previous checks (flake8, mypy, unit tests) are passing, please send a PR. Submitted PR will pass other tests on +different operation systems, python versions and hardwares. + +For more details about pull requests workflow, +please read [GitHub's guides](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request). + +## License + +By contributing to Torchvision, you agree that your contributions will be licensed +under the LICENSE file in the root directory of this source tree. diff --git a/README.rst b/README.rst index 30dce8b4639..17b5e40af55 100644 --- a/README.rst +++ b/README.rst @@ -1,9 +1,6 @@ torchvision =========== -.. image:: https://travis-ci.org/pytorch/vision.svg?branch=master - :target: https://travis-ci.org/pytorch/vision - .. image:: https://codecov.io/gh/pytorch/vision/branch/master/graph/badge.svg :target: https://codecov.io/gh/pytorch/vision @@ -16,10 +13,45 @@ torchvision The torchvision package consists of popular datasets, model architectures, and common image transformations for computer vision. + Installation ============ -TorchVision requires PyTorch 1.2 or newer. +We recommend Anaconda as Python package management system. Please refer to `pytorch.org `_ +for the detail of PyTorch (``torch``) installation. The following is the corresponding ``torchvision`` versions and +supported Python versions. + ++--------------------------+--------------------------+---------------------------------+ +| ``torch`` | ``torchvision`` | ``python`` | ++==========================+==========================+=================================+ +| ``master`` / ``nightly`` | ``master`` / ``nightly`` | ``>=3.6`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.8.0`` | ``0.9.0`` | ``>=3.6`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.7.1`` | ``0.8.2`` | ``>=3.6`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.7.0`` | ``0.8.1`` | ``>=3.6`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.7.0`` | ``0.8.0`` | ``>=3.6`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.6.0`` | ``0.7.0`` | ``>=3.6`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.5.1`` | ``0.6.1`` | ``>=3.5`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.5.0`` | ``0.6.0`` | ``>=3.5`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.4.0`` | ``0.5.0`` | ``==2.7``, ``>=3.5``, ``<=3.8`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.3.1`` | ``0.4.2`` | ``==2.7``, ``>=3.5``, ``<=3.7`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.3.0`` | ``0.4.1`` | ``==2.7``, ``>=3.5``, ``<=3.7`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.2.0`` | ``0.4.0`` | ``==2.7``, ``>=3.5``, ``<=3.7`` | ++--------------------------+--------------------------+---------------------------------+ +| ``1.1.0`` | ``0.3.0`` | ``==2.7``, ``>=3.5``, ``<=3.7`` | ++--------------------------+--------------------------+---------------------------------+ +| ``<=1.0.1`` | ``0.2.2`` | ``==2.7``, ``>=3.5``, ``<=3.7`` | ++--------------------------+--------------------------+---------------------------------+ Anaconda: @@ -41,6 +73,10 @@ From source: # or, for OSX # MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install + +In case building TorchVision from source fails, install the nightly version of PyTorch following +the linked guide on the `contributing page `_ and retry the install. + By default, GPU support is built if CUDA is found and ``torch.cuda.is_available()`` is true. It's possible to force building GPU support by setting ``FORCE_CUDA=1`` environment variable, which is useful when building a docker image. @@ -55,13 +91,23 @@ Torchvision currently supports the following image backends: * `accimage`_ - if installed can be activated by calling :code:`torchvision.set_image_backend('accimage')` +* `libpng`_ - can be installed via conda :code:`conda install libpng` or any of the package managers for debian-based and RHEL-based Linux distributions. + +* `libjpeg`_ - can be installed via conda :code:`conda install jpeg` or any of the package managers for debian-based and RHEL-based Linux distributions. `libjpeg-turbo`_ can be used as well. + +**Notes:** ``libpng`` and ``libjpeg`` must be available at compilation time in order to be available. Make sure that it is available on the standard library locations, +otherwise, add the include and library paths in the environment variables ``TORCHVISION_INCLUDE`` and ``TORCHVISION_LIBRARY``, respectively. + +.. _libpng : http://www.libpng.org/pub/png/libpng.html .. _Pillow : https://python-pillow.org/ .. _Pillow-SIMD : https://github.com/uploadcare/pillow-simd .. _accimage: https://github.com/pytorch/accimage +.. _libjpeg: http://ijg.org/ +.. _libjpeg-turbo: https://libjpeg-turbo.org/ C++ API ======= -TorchVision also offers a C++ API that contains C++ equivalent of python models. +TorchVision also offers a C++ API that contains C++ equivalent of python models. Installation From source: @@ -69,17 +115,36 @@ Installation From source: mkdir build cd build + # Add -DWITH_CUDA=on support for the CUDA if needed cmake .. - make + make make install +Once installed, the library can be accessed in cmake (after properly configuring ``CMAKE_PREFIX_PATH``) via the :code:`TorchVision::TorchVision` target: + +.. code:: rest + + find_package(TorchVision REQUIRED) + target_link_libraries(my-target PUBLIC TorchVision::TorchVision) + +The ``TorchVision`` package will also automatically look for the ``Torch`` package and add it as a dependency to ``my-target``, +so make sure that it is also available to cmake via the ``CMAKE_PREFIX_PATH``. + +For an example setup, take a look at ``examples/cpp/hello_world``. + +TorchVision Operators +--------------------- +In order to get the torchvision operators registered with torch (eg. for the JIT), all you need to do is to ensure that you +:code:`#include ` in your project. + Documentation ============= -You can find the API documentation on the pytorch website: http://pytorch.org/docs/master/torchvision/ +You can find the API documentation on the pytorch website: https://pytorch.org/docs/stable/torchvision/index.html Contributing ============ -We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us. + +See the `CONTRIBUTING `_ file for how to help out. Disclaimer on Datasets ====================== diff --git a/android/.gitignore b/android/.gitignore new file mode 100644 index 00000000000..adcfad04c91 --- /dev/null +++ b/android/.gitignore @@ -0,0 +1,6 @@ +local.properties +**/*.iml +.gradle +.idea/* +.externalNativeBuild +build diff --git a/android/build.gradle b/android/build.gradle new file mode 100644 index 00000000000..8e5fb09f827 --- /dev/null +++ b/android/build.gradle @@ -0,0 +1,41 @@ +allprojects { + buildscript { + ext { + minSdkVersion = 21 + targetSdkVersion = 28 + compileSdkVersion = 28 + buildToolsVersion = '28.0.3' + + coreVersion = "1.2.0" + extJUnitVersion = "1.1.1" + runnerVersion = "1.2.0" + rulesVersion = "1.2.0" + junitVersion = "4.12" + + androidSupportAppCompatV7Version = "28.0.0" + fbjniJavaOnlyVersion = "0.0.3" + soLoaderNativeLoaderVersion = "0.8.0" + } + + repositories { + google() + mavenCentral() + jcenter() + } + + dependencies { + classpath 'com.android.tools.build:gradle:4.1.2' + classpath 'com.vanniktech:gradle-maven-publish-plugin:0.14.2' + } + } + + repositories { + google() + jcenter() + } +} + +ext.deps = [ + jsr305: 'com.google.code.findbugs:jsr305:3.0.1', +] + diff --git a/android/gradle.properties b/android/gradle.properties new file mode 100644 index 00000000000..87804c30107 --- /dev/null +++ b/android/gradle.properties @@ -0,0 +1,24 @@ +ABI_FILTERS=armeabi-v7a,arm64-v8a,x86,x86_64 + +VERSION_NAME=0.10.0-SNAPSHOT +GROUP=org.pytorch +MAVEN_GROUP=org.pytorch +SONATYPE_STAGING_PROFILE=orgpytorch +POM_URL=https://github.com/pytorch/vision/ +POM_SCM_URL=https://github.com/pytorch/vision.git +POM_SCM_CONNECTION=scm:git:https://github.com/pytorch/vision +POM_SCM_DEV_CONNECTION=scm:git:git@github.com:pytorch/vision.git +POM_LICENSE_NAME=BSD 3-Clause +POM_LICENSE_URL=https://github.com/pytorch/vision/blob/master/LICENSE +POM_ISSUES_URL=https://github.com/pytorch/vision/issues +POM_LICENSE_DIST=repo +POM_DEVELOPER_ID=pytorch +POM_DEVELOPER_NAME=pytorch + +# Gradle internals +android.useAndroidX=true +android.enableJetifier=true + +testAppAllVariantsEnabled=false + +org.gradle.jvmargs=-Xmx4096m diff --git a/android/gradle/wrapper/gradle-wrapper.jar b/android/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 00000000000..94336fcae91 Binary files /dev/null and b/android/gradle/wrapper/gradle-wrapper.jar differ diff --git a/android/gradle/wrapper/gradle-wrapper.properties b/android/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000000..442d9132ea3 --- /dev/null +++ b/android/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/android/gradle_scripts/android_tasks.gradle b/android/gradle_scripts/android_tasks.gradle new file mode 100644 index 00000000000..6bba126b2f6 --- /dev/null +++ b/android/gradle_scripts/android_tasks.gradle @@ -0,0 +1,11 @@ +afterEvaluate { project -> + if (POM_PACKAGING == 'aar') { + task headersJar(type: Jar) { + archiveClassifier.set('headers') + from("$rootDir/cxx/") { + include '**/*.h' + } + } + artifacts.add('archives', headersJar) + } +} diff --git a/android/gradle_scripts/release.gradle b/android/gradle_scripts/release.gradle new file mode 100644 index 00000000000..ada97f33964 --- /dev/null +++ b/android/gradle_scripts/release.gradle @@ -0,0 +1,3 @@ +apply from: rootProject.file('gradle_scripts/android_tasks.gradle') + +apply plugin: 'com.vanniktech.maven.publish' diff --git a/android/gradlew b/android/gradlew new file mode 100755 index 00000000000..cccdd3d517f --- /dev/null +++ b/android/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS="" + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/android/gradlew.bat b/android/gradlew.bat new file mode 100644 index 00000000000..e95643d6a2c --- /dev/null +++ b/android/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS= + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/android/ops/CMakeLists.txt b/android/ops/CMakeLists.txt new file mode 100644 index 00000000000..ad42adbfa71 --- /dev/null +++ b/android/ops/CMakeLists.txt @@ -0,0 +1,47 @@ +cmake_minimum_required(VERSION 3.4.1) +set(TARGET torchvision_ops) +project(${TARGET} CXX) +set(CMAKE_CXX_STANDARD 14) + +string(APPEND CMAKE_CXX_FLAGS " -DMOBILE") + +set(build_DIR ${CMAKE_SOURCE_DIR}/build) +set(root_DIR ${CMAKE_CURRENT_LIST_DIR}/..) + +file(GLOB VISION_SRCS + ../../torchvision/csrc/ops/cpu/*.h + ../../torchvision/csrc/ops/cpu/*.cpp + ../../torchvision/csrc/ops/*.h + ../../torchvision/csrc/ops/*.cpp) + +add_library(${TARGET} SHARED + ${VISION_SRCS} +) + +file(GLOB PYTORCH_INCLUDE_DIRS "${build_DIR}/pytorch_android*.aar/headers") +file(GLOB PYTORCH_INCLUDE_DIRS_CSRC "${build_DIR}/pytorch_android*.aar/headers/torch/csrc/api/include") +file(GLOB PYTORCH_LINK_DIRS "${build_DIR}/pytorch_android*.aar/jni/${ANDROID_ABI}") + +target_compile_options(${TARGET} PRIVATE + -fexceptions +) + +set(BUILD_SUBDIR ${ANDROID_ABI}) + +find_library(PYTORCH_LIBRARY pytorch_jni + PATHS ${PYTORCH_LINK_DIRS} + NO_CMAKE_FIND_ROOT_PATH) + +find_library(FBJNI_LIBRARY fbjni + PATHS ${PYTORCH_LINK_DIRS} + NO_CMAKE_FIND_ROOT_PATH) + +target_include_directories(${TARGET} PRIVATE + ${PYTORCH_INCLUDE_DIRS} + ${PYTORCH_INCLUDE_DIRS_CSRC} +) + +target_link_libraries(${TARGET} PRIVATE + ${PYTORCH_LIBRARY} + ${FBJNI_LIBRARY} +) diff --git a/android/ops/build.gradle b/android/ops/build.gradle new file mode 100644 index 00000000000..773e09fb280 --- /dev/null +++ b/android/ops/build.gradle @@ -0,0 +1,94 @@ +apply plugin: 'com.android.library' +apply plugin: 'maven' + +repositories { + jcenter() + maven { + url "https://oss.sonatype.org/content/repositories/snapshots" + } + flatDir { + dirs 'aars' + } +} + +android { + configurations { + extractForNativeBuild + } + compileSdkVersion rootProject.compileSdkVersion + buildToolsVersion rootProject.buildToolsVersion + + + defaultConfig { + minSdkVersion rootProject.minSdkVersion + targetSdkVersion rootProject.targetSdkVersion + versionCode 0 + versionName "0.1" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + ndk { + abiFilters ABI_FILTERS.split(",") + } + } + + buildTypes { + debug { + minifyEnabled false + debuggable true + } + release { + minifyEnabled false + } + } + + externalNativeBuild { + cmake { + path "CMakeLists.txt" + } + } + + useLibrary 'android.test.runner' + useLibrary 'android.test.base' + useLibrary 'android.test.mock' +} + +dependencies { + implementation 'com.android.support:appcompat-v7:' + rootProject.androidSupportAppCompatV7Version + + implementation 'org.pytorch:pytorch_android:1.8.0-SNAPSHOT' + extractForNativeBuild 'org.pytorch:pytorch_android:1.8.0-SNAPSHOT' + + // For testing: deps on local aar files + //implementation(name: 'pytorch_android-release', ext: 'aar') + //extractForNativeBuild(name: 'pytorch_android-release', ext: 'aar') + //implementation 'com.facebook.fbjni:fbjni-java-only:0.0.3' +} + +task extractAARForNativeBuild { + doLast { + configurations.extractForNativeBuild.files.each { + def file = it.absoluteFile + copy { + from zipTree(file) + into "$buildDir/$file.name" + include "headers/**" + include "jni/**" + } + } + } +} + +tasks.whenTaskAdded { task -> + if (task.name.contains('externalNativeBuild')) { + task.dependsOn(extractAARForNativeBuild) + } +} + +apply from: rootProject.file('gradle_scripts/release.gradle') + +task sourcesJar(type: Jar) { + from android.sourceSets.main.java.srcDirs + classifier = 'sources' +} + +artifacts.add('archives', sourcesJar) diff --git a/android/ops/gradle.properties b/android/ops/gradle.properties new file mode 100644 index 00000000000..5a4ea2f3aba --- /dev/null +++ b/android/ops/gradle.properties @@ -0,0 +1,4 @@ +POM_NAME=torchvision ops +POM_DESCRIPTION=torchvision ops +POM_ARTIFACT_ID=torchvision_ops +POM_PACKAGING=aar diff --git a/android/ops/src/main/AndroidManifest.xml b/android/ops/src/main/AndroidManifest.xml new file mode 100644 index 00000000000..8ca386493c4 --- /dev/null +++ b/android/ops/src/main/AndroidManifest.xml @@ -0,0 +1 @@ + diff --git a/android/settings.gradle b/android/settings.gradle new file mode 100644 index 00000000000..6d34eb8d51a --- /dev/null +++ b/android/settings.gradle @@ -0,0 +1,4 @@ +include ':ops', ':test_app' + +project(':ops').projectDir = file('ops') +project(':test_app').projectDir = file('test_app/app') diff --git a/android/test_app/app/build.gradle b/android/test_app/app/build.gradle new file mode 100644 index 00000000000..cc6b4590261 --- /dev/null +++ b/android/test_app/app/build.gradle @@ -0,0 +1,134 @@ +apply plugin: 'com.android.application' + +repositories { + jcenter() + maven { + url "https://oss.sonatype.org/content/repositories/snapshots" + } + flatDir { + dirs 'aars' + } +} + +android { + configurations { + extractForNativeBuild + } + compileOptions { + sourceCompatibility 1.8 + targetCompatibility 1.8 + } + compileSdkVersion rootProject.compileSdkVersion + buildToolsVersion rootProject.buildToolsVersion + defaultConfig { + applicationId "org.pytorch.testapp" + minSdkVersion rootProject.minSdkVersion + targetSdkVersion rootProject.targetSdkVersion + versionCode 1 + versionName "1.0" + ndk { + abiFilters ABI_FILTERS.split(",") + } + externalNativeBuild { + cmake { + abiFilters ABI_FILTERS.split(",") + arguments "-DANDROID_STL=c++_shared" + } + } + buildConfigField("String", "MODULE_ASSET_NAME", "\"frcnn_mnetv3.pt\"") + buildConfigField("String", "LOGCAT_TAG", "@string/app_name") + buildConfigField("long[]", "INPUT_TENSOR_SHAPE", "new long[]{3, 96, 96}") + addManifestPlaceholders([APP_NAME: "@string/app_name", MAIN_ACTIVITY: "org.pytorch.testapp.MainActivity"]) + } + buildTypes { + debug { + minifyEnabled false + debuggable true + } + release { + minifyEnabled false + } + } + flavorDimensions "model", "activity", "build" + productFlavors { + frcnnMnetv3 { + dimension "model" + applicationIdSuffix ".frcnnMnetv3" + buildConfigField("String", "MODULE_ASSET_NAME", "\"frcnn_mnetv3.pt\"") + addManifestPlaceholders([APP_NAME: "TV_FRCNN_MNETV3"]) + buildConfigField("String", "LOGCAT_TAG", "\"pytorch-frcnn-mnetv3\"") + } + camera { + dimension "activity" + addManifestPlaceholders([APP_NAME: "TV_CAMERA_FRCNN"]) + addManifestPlaceholders([MAIN_ACTIVITY: "org.pytorch.testapp.CameraActivity"]) + } + base { + dimension "activity" + } + aar { + dimension "build" + } + local { + dimension "build" + } + } + packagingOptions { + doNotStrip '**.so' + } + + // Filtering for CI + if (!testAppAllVariantsEnabled.toBoolean()) { + variantFilter { variant -> + def names = variant.flavors*.name + if (names.contains("aar")) { + setIgnore(true) + } + } + } +} + +tasks.all { task -> + // Disable externalNativeBuild for all but nativeBuild variant + if (task.name.startsWith('externalNativeBuild') + && !task.name.contains('NativeBuild')) { + task.enabled = false + } +} + +dependencies { + implementation 'com.android.support:appcompat-v7:28.0.0' + implementation 'com.facebook.soloader:nativeloader:0.8.0' + localImplementation project(':ops') + + implementation 'org.pytorch:pytorch_android:1.8.0-SNAPSHOT' + implementation 'org.pytorch:pytorch_android_torchvision:1.8.0-SNAPSHOT' + + aarImplementation(name: 'pytorch_android-release', ext: 'aar') + aarImplementation(name: 'pytorch_android_torchvision-release', ext: 'aar') + + def camerax_version = "1.0.0-alpha05" + implementation "androidx.camera:camera-core:$camerax_version" + implementation "androidx.camera:camera-camera2:$camerax_version" + implementation 'com.google.android.material:material:1.0.0-beta01' +} + +task extractAARForNativeBuild { + doLast { + configurations.extractForNativeBuild.files.each { + def file = it.absoluteFile + copy { + from zipTree(file) + into "$buildDir/$file.name" + include "headers/**" + include "jni/**" + } + } + } +} + +tasks.whenTaskAdded { task -> + if (task.name.contains('externalNativeBuild')) { + task.dependsOn(extractAARForNativeBuild) + } +} diff --git a/android/test_app/app/src/main/AndroidManifest.xml b/android/test_app/app/src/main/AndroidManifest.xml new file mode 100644 index 00000000000..a83bf223bda --- /dev/null +++ b/android/test_app/app/src/main/AndroidManifest.xml @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + diff --git a/android/test_app/app/src/main/java/org/pytorch/testapp/BBox.java b/android/test_app/app/src/main/java/org/pytorch/testapp/BBox.java new file mode 100644 index 00000000000..6fd60791864 --- /dev/null +++ b/android/test_app/app/src/main/java/org/pytorch/testapp/BBox.java @@ -0,0 +1,22 @@ +package org.pytorch.testapp; + +class BBox { + public final float score; + public final float x0; + public final float y0; + public final float x1; + public final float y1; + + public BBox(float score, float x0, float y0, float x1, float y1) { + this.score = score; + this.x0 = x0; + this.y0 = y0; + this.x1 = x1; + this.y1 = y1; + } + + @Override + public String toString() { + return String.format("Box{score=%f x0=%f y0=%f x1=%f y1=%f", score, x0, y0, x1, y1); + } +} diff --git a/android/test_app/app/src/main/java/org/pytorch/testapp/CameraActivity.java b/android/test_app/app/src/main/java/org/pytorch/testapp/CameraActivity.java new file mode 100644 index 00000000000..1c427bb82ba --- /dev/null +++ b/android/test_app/app/src/main/java/org/pytorch/testapp/CameraActivity.java @@ -0,0 +1,432 @@ +package org.pytorch.testapp; + +import android.Manifest; +import android.content.Context; +import android.content.pm.PackageManager; +import android.graphics.Bitmap; +import android.graphics.Canvas; +import android.graphics.Color; +import android.graphics.Paint; +import android.graphics.Rect; +import android.os.Bundle; +import android.os.Handler; +import android.os.HandlerThread; +import android.os.SystemClock; +import android.util.DisplayMetrics; +import android.util.Log; +import android.util.Size; +import android.view.TextureView; +import android.view.ViewStub; +import android.widget.ImageView; +import android.widget.TextView; +import android.widget.Toast; +import androidx.annotation.Nullable; +import androidx.annotation.UiThread; +import androidx.annotation.WorkerThread; +import androidx.appcompat.app.AppCompatActivity; +import androidx.camera.core.CameraX; +import androidx.camera.core.ImageAnalysis; +import androidx.camera.core.ImageAnalysisConfig; +import androidx.camera.core.ImageProxy; +import androidx.camera.core.Preview; +import androidx.camera.core.PreviewConfig; +import androidx.core.app.ActivityCompat; +import com.facebook.soloader.nativeloader.NativeLoader; +import com.facebook.soloader.nativeloader.SystemDelegate; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.FloatBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.pytorch.IValue; +import org.pytorch.Module; +import org.pytorch.Tensor; + +public class CameraActivity extends AppCompatActivity { + + private static final float BBOX_SCORE_DRAW_THRESHOLD = 0.5f; + private static final String TAG = BuildConfig.LOGCAT_TAG; + private static final int TEXT_TRIM_SIZE = 4096; + private static final int RGB_MAX_CHANNEL_VALUE = 262143; + + private static final int REQUEST_CODE_CAMERA_PERMISSION = 200; + private static final String[] PERMISSIONS = {Manifest.permission.CAMERA}; + + static { + if (!NativeLoader.isInitialized()) { + NativeLoader.init(new SystemDelegate()); + } + NativeLoader.loadLibrary("pytorch_jni"); + NativeLoader.loadLibrary("torchvision_ops"); + } + + private Bitmap mInputTensorBitmap; + private Bitmap mBitmap; + private Canvas mCanvas; + + private long mLastAnalysisResultTime; + + protected HandlerThread mBackgroundThread; + protected Handler mBackgroundHandler; + protected Handler mUIHandler; + + private TextView mTextView; + private ImageView mCameraOverlay; + private StringBuilder mTextViewStringBuilder = new StringBuilder(); + + private Paint mBboxPaint; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_camera); + mTextView = findViewById(R.id.text); + mCameraOverlay = findViewById(R.id.camera_overlay); + mUIHandler = new Handler(getMainLooper()); + startBackgroundThread(); + + if (ActivityCompat.checkSelfPermission(this, Manifest.permission.CAMERA) + != PackageManager.PERMISSION_GRANTED) { + ActivityCompat.requestPermissions(this, PERMISSIONS, REQUEST_CODE_CAMERA_PERMISSION); + } else { + setupCameraX(); + } + mBboxPaint = new Paint(); + mBboxPaint.setAntiAlias(true); + mBboxPaint.setDither(true); + mBboxPaint.setColor(Color.GREEN); + } + + @Override + protected void onPostCreate(@Nullable Bundle savedInstanceState) { + super.onPostCreate(savedInstanceState); + startBackgroundThread(); + } + + protected void startBackgroundThread() { + mBackgroundThread = new HandlerThread("ModuleActivity"); + mBackgroundThread.start(); + mBackgroundHandler = new Handler(mBackgroundThread.getLooper()); + } + + @Override + protected void onDestroy() { + stopBackgroundThread(); + super.onDestroy(); + } + + protected void stopBackgroundThread() { + mBackgroundThread.quitSafely(); + try { + mBackgroundThread.join(); + mBackgroundThread = null; + mBackgroundHandler = null; + } catch (InterruptedException e) { + Log.e(TAG, "Error on stopping background thread", e); + } + } + + @Override + public void onRequestPermissionsResult( + int requestCode, String[] permissions, int[] grantResults) { + if (requestCode == REQUEST_CODE_CAMERA_PERMISSION) { + if (grantResults[0] == PackageManager.PERMISSION_DENIED) { + Toast.makeText( + this, + "You can't use image classification example without granting CAMERA permission", + Toast.LENGTH_LONG) + .show(); + finish(); + } else { + setupCameraX(); + } + } + } + + private void setupCameraX() { + final TextureView textureView = + ((ViewStub) findViewById(R.id.camera_texture_view_stub)) + .inflate() + .findViewById(R.id.texture_view); + final PreviewConfig previewConfig = new PreviewConfig.Builder().build(); + final Preview preview = new Preview(previewConfig); + preview.setOnPreviewOutputUpdateListener( + new Preview.OnPreviewOutputUpdateListener() { + @Override + public void onUpdated(Preview.PreviewOutput output) { + textureView.setSurfaceTexture(output.getSurfaceTexture()); + } + }); + + final DisplayMetrics displayMetrics = new DisplayMetrics(); + getWindowManager().getDefaultDisplay().getMetrics(displayMetrics); + + final ImageAnalysisConfig imageAnalysisConfig = + new ImageAnalysisConfig.Builder() + .setTargetResolution(new Size(displayMetrics.widthPixels, displayMetrics.heightPixels)) + .setCallbackHandler(mBackgroundHandler) + .setImageReaderMode(ImageAnalysis.ImageReaderMode.ACQUIRE_LATEST_IMAGE) + .build(); + final ImageAnalysis imageAnalysis = new ImageAnalysis(imageAnalysisConfig); + imageAnalysis.setAnalyzer( + new ImageAnalysis.Analyzer() { + @Override + public void analyze(ImageProxy image, int rotationDegrees) { + if (SystemClock.elapsedRealtime() - mLastAnalysisResultTime < 500) { + return; + } + + final Result result = CameraActivity.this.analyzeImage(image, rotationDegrees); + + if (result != null) { + mLastAnalysisResultTime = SystemClock.elapsedRealtime(); + CameraActivity.this.runOnUiThread( + new Runnable() { + @Override + public void run() { + CameraActivity.this.handleResult(result); + } + }); + } + } + }); + + CameraX.bindToLifecycle(this, preview, imageAnalysis); + } + + private Module mModule; + private FloatBuffer mInputTensorBuffer; + private Tensor mInputTensor; + + private static int clamp0255(int x) { + if (x > 255) { + return 255; + } + return x < 0 ? 0 : x; + } + + protected void fillInputTensorBuffer( + ImageProxy image, int rotationDegrees, FloatBuffer inputTensorBuffer) { + + if (mInputTensorBitmap == null) { + final int tensorSize = Math.min(image.getWidth(), image.getHeight()); + mInputTensorBitmap = Bitmap.createBitmap(tensorSize, tensorSize, Bitmap.Config.ARGB_8888); + } + + ImageProxy.PlaneProxy[] planes = image.getPlanes(); + ImageProxy.PlaneProxy Y = planes[0]; + ImageProxy.PlaneProxy U = planes[1]; + ImageProxy.PlaneProxy V = planes[2]; + ByteBuffer yBuffer = Y.getBuffer(); + ByteBuffer uBuffer = U.getBuffer(); + ByteBuffer vBuffer = V.getBuffer(); + final int imageWidth = image.getWidth(); + final int imageHeight = image.getHeight(); + final int tensorSize = Math.min(imageWidth, imageHeight); + + int widthAfterRtn = imageWidth; + int heightAfterRtn = imageHeight; + boolean oddRotation = rotationDegrees == 90 || rotationDegrees == 270; + if (oddRotation) { + widthAfterRtn = imageHeight; + heightAfterRtn = imageWidth; + } + + int minSizeAfterRtn = Math.min(heightAfterRtn, widthAfterRtn); + int cropWidthAfterRtn = minSizeAfterRtn; + int cropHeightAfterRtn = minSizeAfterRtn; + + int cropWidthBeforeRtn = cropWidthAfterRtn; + int cropHeightBeforeRtn = cropHeightAfterRtn; + if (oddRotation) { + cropWidthBeforeRtn = cropHeightAfterRtn; + cropHeightBeforeRtn = cropWidthAfterRtn; + } + + int offsetX = (int) ((imageWidth - cropWidthBeforeRtn) / 2.f); + int offsetY = (int) ((imageHeight - cropHeightBeforeRtn) / 2.f); + + int yRowStride = Y.getRowStride(); + int yPixelStride = Y.getPixelStride(); + int uvRowStride = U.getRowStride(); + int uvPixelStride = U.getPixelStride(); + + float scale = cropWidthAfterRtn / tensorSize; + int yIdx, uvIdx, yi, ui, vi; + final int channelSize = tensorSize * tensorSize; + for (int y = 0; y < tensorSize; y++) { + for (int x = 0; x < tensorSize; x++) { + final int centerCropX = (int) Math.floor(x * scale); + final int centerCropY = (int) Math.floor(y * scale); + int srcX = centerCropX + offsetX; + int srcY = centerCropY + offsetY; + + if (rotationDegrees == 90) { + srcX = offsetX + centerCropY; + srcY = offsetY + (minSizeAfterRtn - 1) - centerCropX; + } else if (rotationDegrees == 180) { + srcX = offsetX + (minSizeAfterRtn - 1) - centerCropX; + srcY = offsetY + (minSizeAfterRtn - 1) - centerCropY; + } else if (rotationDegrees == 270) { + srcX = offsetX + (minSizeAfterRtn - 1) - centerCropY; + srcY = offsetY + centerCropX; + } + + yIdx = srcY * yRowStride + srcX * yPixelStride; + uvIdx = (srcY >> 1) * uvRowStride + (srcX >> 1) * uvPixelStride; + + yi = yBuffer.get(yIdx) & 0xff; + ui = uBuffer.get(uvIdx) & 0xff; + vi = vBuffer.get(uvIdx) & 0xff; + + yi = (yi - 16) < 0 ? 0 : (yi - 16); + ui -= 128; + vi -= 128; + + int a0 = 1192 * yi; + int ri = (a0 + 1634 * vi); + int gi = (a0 - 833 * vi - 400 * ui); + int bi = (a0 + 2066 * ui); + + ri = ri > RGB_MAX_CHANNEL_VALUE ? RGB_MAX_CHANNEL_VALUE : (ri < 0 ? 0 : ri); + gi = gi > RGB_MAX_CHANNEL_VALUE ? RGB_MAX_CHANNEL_VALUE : (gi < 0 ? 0 : gi); + bi = bi > RGB_MAX_CHANNEL_VALUE ? RGB_MAX_CHANNEL_VALUE : (bi < 0 ? 0 : bi); + + final int color = + 0xff000000 | ((ri << 6) & 0xff0000) | ((gi >> 2) & 0xff00) | ((bi >> 10) & 0xff); + mInputTensorBitmap.setPixel(x, y, color); + inputTensorBuffer.put(0 * channelSize + y * tensorSize + x, clamp0255(ri >> 10) / 255.f); + inputTensorBuffer.put(1 * channelSize + y * tensorSize + x, clamp0255(gi >> 10) / 255.f); + inputTensorBuffer.put(2 * channelSize + y * tensorSize + x, clamp0255(bi >> 10) / 255.f); + } + } + } + + public static String assetFilePath(Context context, String assetName) { + File file = new File(context.getFilesDir(), assetName); + if (file.exists() && file.length() > 0) { + return file.getAbsolutePath(); + } + + try (InputStream is = context.getAssets().open(assetName)) { + try (OutputStream os = new FileOutputStream(file)) { + byte[] buffer = new byte[4 * 1024]; + int read; + while ((read = is.read(buffer)) != -1) { + os.write(buffer, 0, read); + } + os.flush(); + } + return file.getAbsolutePath(); + } catch (IOException e) { + Log.e(TAG, "Error process asset " + assetName + " to file path"); + } + return null; + } + + @WorkerThread + @Nullable + protected Result analyzeImage(ImageProxy image, int rotationDegrees) { + Log.i(TAG, String.format("analyzeImage(%s, %d)", image, rotationDegrees)); + final int tensorSize = Math.min(image.getWidth(), image.getHeight()); + if (mModule == null) { + Log.i(TAG, "Loading module from asset '" + BuildConfig.MODULE_ASSET_NAME + "'"); + mInputTensorBuffer = Tensor.allocateFloatBuffer(3 * tensorSize * tensorSize); + mInputTensor = Tensor.fromBlob(mInputTensorBuffer, new long[] {3, tensorSize, tensorSize}); + final String modelFileAbsoluteFilePath = + new File(assetFilePath(this, BuildConfig.MODULE_ASSET_NAME)).getAbsolutePath(); + mModule = Module.load(modelFileAbsoluteFilePath); + } + + final long startTime = SystemClock.elapsedRealtime(); + fillInputTensorBuffer(image, rotationDegrees, mInputTensorBuffer); + + final long moduleForwardStartTime = SystemClock.elapsedRealtime(); + final IValue outputTuple = mModule.forward(IValue.listFrom(mInputTensor)); + final IValue out1 = outputTuple.toTuple()[1]; + final Map map = out1.toList()[0].toDictStringKey(); + + float[] boxesData = new float[] {}; + float[] scoresData = new float[] {}; + final List bboxes = new ArrayList<>(); + if (map.containsKey("boxes")) { + final Tensor boxesTensor = map.get("boxes").toTensor(); + final Tensor scoresTensor = map.get("scores").toTensor(); + boxesData = boxesTensor.getDataAsFloatArray(); + scoresData = scoresTensor.getDataAsFloatArray(); + final int n = scoresData.length; + for (int i = 0; i < n; i++) { + final BBox bbox = + new BBox( + scoresData[i], + boxesData[4 * i + 0], + boxesData[4 * i + 1], + boxesData[4 * i + 2], + boxesData[4 * i + 3]); + android.util.Log.i(TAG, String.format("Forward result %d: %s", i, bbox)); + bboxes.add(bbox); + } + } else { + android.util.Log.i(TAG, "Forward result empty"); + } + + final long moduleForwardDuration = SystemClock.elapsedRealtime() - moduleForwardStartTime; + final long analysisDuration = SystemClock.elapsedRealtime() - startTime; + return new Result(tensorSize, bboxes, moduleForwardDuration, analysisDuration); + } + + @UiThread + protected void handleResult(Result result) { + final int W = mCameraOverlay.getMeasuredWidth(); + final int H = mCameraOverlay.getMeasuredHeight(); + + final int size = Math.min(W, H); + final int offsetX = (W - size) / 2; + final int offsetY = (H - size) / 2; + + float scaleX = (float) size / result.tensorSize; + float scaleY = (float) size / result.tensorSize; + if (mBitmap == null) { + mBitmap = Bitmap.createBitmap(W, H, Bitmap.Config.ARGB_8888); + mCanvas = new Canvas(mBitmap); + } + + mCanvas.drawBitmap( + mInputTensorBitmap, + new Rect(0, 0, result.tensorSize, result.tensorSize), + new Rect(offsetX, offsetY, offsetX + size, offsetY + size), + null); + + for (final BBox bbox : result.bboxes) { + if (bbox.score < BBOX_SCORE_DRAW_THRESHOLD) { + continue; + } + + float c_x0 = offsetX + scaleX * bbox.x0; + float c_y0 = offsetY + scaleY * bbox.y0; + + float c_x1 = offsetX + scaleX * bbox.x1; + float c_y1 = offsetY + scaleY * bbox.y1; + + mCanvas.drawLine(c_x0, c_y0, c_x1, c_y0, mBboxPaint); + mCanvas.drawLine(c_x1, c_y0, c_x1, c_y1, mBboxPaint); + mCanvas.drawLine(c_x1, c_y1, c_x0, c_y1, mBboxPaint); + mCanvas.drawLine(c_x0, c_y1, c_x0, c_y0, mBboxPaint); + mCanvas.drawText(String.format("%.2f", bbox.score), c_x0, c_y0, mBboxPaint); + } + mCameraOverlay.setImageBitmap(mBitmap); + + String message = String.format("forwardDuration:%d", result.moduleForwardDuration); + Log.i(TAG, message); + mTextViewStringBuilder.insert(0, '\n').insert(0, message); + if (mTextViewStringBuilder.length() > TEXT_TRIM_SIZE) { + mTextViewStringBuilder.delete(TEXT_TRIM_SIZE, mTextViewStringBuilder.length()); + } + mTextView.setText(mTextViewStringBuilder.toString()); + } +} diff --git a/android/test_app/app/src/main/java/org/pytorch/testapp/MainActivity.java b/android/test_app/app/src/main/java/org/pytorch/testapp/MainActivity.java new file mode 100644 index 00000000000..a9c13bffa6e --- /dev/null +++ b/android/test_app/app/src/main/java/org/pytorch/testapp/MainActivity.java @@ -0,0 +1,159 @@ +package org.pytorch.testapp; + +import android.os.Bundle; +import android.os.Handler; +import android.os.HandlerThread; +import android.os.SystemClock; +import android.util.Log; +import android.widget.TextView; +import androidx.annotation.Nullable; +import androidx.annotation.UiThread; +import androidx.annotation.WorkerThread; +import androidx.appcompat.app.AppCompatActivity; +import com.facebook.soloader.nativeloader.NativeLoader; +import com.facebook.soloader.nativeloader.SystemDelegate; +import java.nio.FloatBuffer; +import java.util.Map; +import org.pytorch.IValue; +import org.pytorch.Module; +import org.pytorch.PyTorchAndroid; +import org.pytorch.Tensor; + +public class MainActivity extends AppCompatActivity { + static { + if (!NativeLoader.isInitialized()) { + NativeLoader.init(new SystemDelegate()); + } + NativeLoader.loadLibrary("pytorch_jni"); + NativeLoader.loadLibrary("torchvision_ops"); + } + + private static final String TAG = BuildConfig.LOGCAT_TAG; + private static final int TEXT_TRIM_SIZE = 4096; + + private TextView mTextView; + + protected HandlerThread mBackgroundThread; + protected Handler mBackgroundHandler; + private Module mModule; + private FloatBuffer mInputTensorBuffer; + private Tensor mInputTensor; + private StringBuilder mTextViewStringBuilder = new StringBuilder(); + + private final Runnable mModuleForwardRunnable = + new Runnable() { + @Override + public void run() { + final Result result = doModuleForward(); + runOnUiThread( + () -> { + handleResult(result); + if (mBackgroundHandler != null) { + mBackgroundHandler.post(mModuleForwardRunnable); + } + }); + } + }; + + @Override + protected void onCreate(Bundle savedInstanceState) { + super.onCreate(savedInstanceState); + setContentView(R.layout.activity_main); + mTextView = findViewById(R.id.text); + startBackgroundThread(); + mBackgroundHandler.post(mModuleForwardRunnable); + } + + protected void startBackgroundThread() { + mBackgroundThread = new HandlerThread(TAG + "_bg"); + mBackgroundThread.start(); + mBackgroundHandler = new Handler(mBackgroundThread.getLooper()); + } + + @Override + protected void onDestroy() { + stopBackgroundThread(); + super.onDestroy(); + } + + protected void stopBackgroundThread() { + mBackgroundThread.quitSafely(); + try { + mBackgroundThread.join(); + mBackgroundThread = null; + mBackgroundHandler = null; + } catch (InterruptedException e) { + Log.e(TAG, "Error stopping background thread", e); + } + } + + @WorkerThread + @Nullable + protected Result doModuleForward() { + if (mModule == null) { + final long[] shape = BuildConfig.INPUT_TENSOR_SHAPE; + long numElements = 1; + for (int i = 0; i < shape.length; i++) { + numElements *= shape[i]; + } + mInputTensorBuffer = Tensor.allocateFloatBuffer((int) numElements); + mInputTensor = Tensor.fromBlob(mInputTensorBuffer, BuildConfig.INPUT_TENSOR_SHAPE); + PyTorchAndroid.setNumThreads(1); + mModule = PyTorchAndroid.loadModuleFromAsset(getAssets(), BuildConfig.MODULE_ASSET_NAME); + } + + final long startTime = SystemClock.elapsedRealtime(); + final long moduleForwardStartTime = SystemClock.elapsedRealtime(); + final IValue outputTuple = mModule.forward(IValue.listFrom(mInputTensor)); + final IValue[] outputArray = outputTuple.toTuple(); + final IValue out0 = outputArray[0]; + final Map map = out0.toDictStringKey(); + if (map.containsKey("boxes")) { + final Tensor boxes = map.get("boxes").toTensor(); + final Tensor scores = map.get("scores").toTensor(); + final float[] boxesData = boxes.getDataAsFloatArray(); + final float[] scoresData = scores.getDataAsFloatArray(); + final int n = scoresData.length; + for (int i = 0; i < n; i++) { + android.util.Log.i( + TAG, + String.format( + "Forward result %d: score %f box:(%f, %f, %f, %f)", + scoresData[i], + boxesData[4 * i + 0], + boxesData[4 * i + 1], + boxesData[4 * i + 2], + boxesData[4 * i + 3])); + } + } else { + android.util.Log.i(TAG, "Forward result empty"); + } + + final long moduleForwardDuration = SystemClock.elapsedRealtime() - moduleForwardStartTime; + final long analysisDuration = SystemClock.elapsedRealtime() - startTime; + return new Result(new float[] {}, moduleForwardDuration, analysisDuration); + } + + static class Result { + + private final float[] scores; + private final long totalDuration; + private final long moduleForwardDuration; + + public Result(float[] scores, long moduleForwardDuration, long totalDuration) { + this.scores = scores; + this.moduleForwardDuration = moduleForwardDuration; + this.totalDuration = totalDuration; + } + } + + @UiThread + protected void handleResult(Result result) { + String message = String.format("forwardDuration:%d", result.moduleForwardDuration); + mTextViewStringBuilder.insert(0, '\n').insert(0, message); + if (mTextViewStringBuilder.length() > TEXT_TRIM_SIZE) { + mTextViewStringBuilder.delete(TEXT_TRIM_SIZE, mTextViewStringBuilder.length()); + } + mTextView.setText(mTextViewStringBuilder.toString()); + } +} diff --git a/android/test_app/app/src/main/java/org/pytorch/testapp/Result.java b/android/test_app/app/src/main/java/org/pytorch/testapp/Result.java new file mode 100644 index 00000000000..ed7ebd006cd --- /dev/null +++ b/android/test_app/app/src/main/java/org/pytorch/testapp/Result.java @@ -0,0 +1,17 @@ +package org.pytorch.testapp; + +import java.util.List; + +class Result { + public final int tensorSize; + public final List bboxes; + public final long totalDuration; + public final long moduleForwardDuration; + + public Result(int tensorSize, List bboxes, long moduleForwardDuration, long totalDuration) { + this.tensorSize = tensorSize; + this.bboxes = bboxes; + this.moduleForwardDuration = moduleForwardDuration; + this.totalDuration = totalDuration; + } +} diff --git a/android/test_app/app/src/main/res/layout/activity_camera.xml b/android/test_app/app/src/main/res/layout/activity_camera.xml new file mode 100644 index 00000000000..7ba2e42b7c0 --- /dev/null +++ b/android/test_app/app/src/main/res/layout/activity_camera.xml @@ -0,0 +1,28 @@ + + + + + + + + + diff --git a/android/test_app/app/src/main/res/layout/activity_main.xml b/android/test_app/app/src/main/res/layout/activity_main.xml new file mode 100644 index 00000000000..c0939ebc0eb --- /dev/null +++ b/android/test_app/app/src/main/res/layout/activity_main.xml @@ -0,0 +1,17 @@ + + + + + + \ No newline at end of file diff --git a/android/test_app/app/src/main/res/layout/texture_view.xml b/android/test_app/app/src/main/res/layout/texture_view.xml new file mode 100644 index 00000000000..6518c6c84c6 --- /dev/null +++ b/android/test_app/app/src/main/res/layout/texture_view.xml @@ -0,0 +1,5 @@ + + diff --git a/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher.png b/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher.png new file mode 100644 index 00000000000..64ba76f75e9 Binary files /dev/null and b/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher.png differ diff --git a/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher_round.png b/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher_round.png new file mode 100644 index 00000000000..dae5e082342 Binary files /dev/null and b/android/test_app/app/src/main/res/mipmap-mdpi/ic_launcher_round.png differ diff --git a/android/test_app/app/src/main/res/values/colors.xml b/android/test_app/app/src/main/res/values/colors.xml new file mode 100644 index 00000000000..69b22338c65 --- /dev/null +++ b/android/test_app/app/src/main/res/values/colors.xml @@ -0,0 +1,6 @@ + + + #008577 + #00574B + #D81B60 + diff --git a/android/test_app/app/src/main/res/values/strings.xml b/android/test_app/app/src/main/res/values/strings.xml new file mode 100644 index 00000000000..cafbaad1511 --- /dev/null +++ b/android/test_app/app/src/main/res/values/strings.xml @@ -0,0 +1,3 @@ + + TV_FRCNN + diff --git a/android/test_app/app/src/main/res/values/styles.xml b/android/test_app/app/src/main/res/values/styles.xml new file mode 100644 index 00000000000..5885930df6d --- /dev/null +++ b/android/test_app/app/src/main/res/values/styles.xml @@ -0,0 +1,11 @@ + + + + + + diff --git a/android/test_app/make_assets.py b/android/test_app/make_assets.py new file mode 100644 index 00000000000..7860c759a57 --- /dev/null +++ b/android/test_app/make_assets.py @@ -0,0 +1,17 @@ +import torch +import torchvision +from torch.utils.mobile_optimizer import optimize_for_mobile + +print(torch.__version__) + +model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn( + pretrained=True, + box_score_thresh=0.7, + rpn_post_nms_top_n_test=100, + rpn_score_thresh=0.4, + rpn_pre_nms_top_n_test=150) + +model.eval() +script_model = torch.jit.script(model) +opt_script_model = optimize_for_mobile(script_model) +opt_script_model.save("app/src/main/assets/frcnn_mnetv3.pt") diff --git a/cmake/TorchVisionConfig.cmake.in b/cmake/TorchVisionConfig.cmake.in new file mode 100644 index 00000000000..42a3d566166 --- /dev/null +++ b/cmake/TorchVisionConfig.cmake.in @@ -0,0 +1,43 @@ +# TorchVisionConfig.cmake +# -------------------- +# +# Exported targets:: Vision +# + +@PACKAGE_INIT@ + +set(PN TorchVision) + +# location of include/torchvision +set(${PN}_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@") + +set(${PN}_LIBRARY "") +set(${PN}_DEFINITIONS USING_${PN}) + +check_required_components(${PN}) + + +if(NOT (CMAKE_VERSION VERSION_LESS 3.0)) +#----------------------------------------------------------------------------- +# Don't include targets if this file is being picked up by another +# project which has already built this as a subproject +#----------------------------------------------------------------------------- +if(NOT TARGET ${PN}::TorchVision) +include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake") + +if(NOT TARGET torch_library) +find_package(Torch REQUIRED) +endif() +if(NOT TARGET Python3::Python) +find_package(Python3 COMPONENTS Development) +endif() + +set_target_properties(TorchVision::TorchVision PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${${PN}_INCLUDE_DIR}" INTERFACE_LINK_LIBRARIES "torch;Python3::Python" ) + + +if(@WITH_CUDA@) + target_compile_definitions(TorchVision::TorchVision INTERFACE WITH_CUDA) +endif() + +endif() +endif() diff --git a/cmake/iOS.cmake b/cmake/iOS.cmake new file mode 100644 index 00000000000..d42ea4c9232 --- /dev/null +++ b/cmake/iOS.cmake @@ -0,0 +1,207 @@ +# This file is based off of the Platform/Darwin.cmake and Platform/UnixPaths.cmake +# files which are included with CMake 2.8.4 +# It has been altered for iOS development + +# Options: +# +# IOS_PLATFORM = OS (default) or SIMULATOR +# This decides if SDKS will be selected from the iPhoneOS.platform or iPhoneSimulator.platform folders +# OS - the default, used to build for iPhone and iPad physical devices, which have an arm arch. +# SIMULATOR - used to build for the Simulator platforms, which have an x86 arch. +# +# CMAKE_IOS_DEVELOPER_ROOT = automatic(default) or /path/to/platform/Developer folder +# By default this location is automatcially chosen based on the IOS_PLATFORM value above. +# If set manually, it will override the default location and force the user of a particular Developer Platform +# +# CMAKE_IOS_SDK_ROOT = automatic(default) or /path/to/platform/Developer/SDKs/SDK folder +# By default this location is automatcially chosen based on the CMAKE_IOS_DEVELOPER_ROOT value. +# In this case it will always be the most up-to-date SDK found in the CMAKE_IOS_DEVELOPER_ROOT path. +# If set manually, this will force the use of a specific SDK version + +# Macros: +# +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE) +# A convenience macro for setting xcode specific properties on targets +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1") +# +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the iOS environment. +# Thanks to the android-cmake project for providing the command + +# Standard settings +set(CMAKE_SYSTEM_NAME Darwin) +set(CMAKE_SYSTEM_VERSION 1) +set(UNIX True) +set(APPLE True) +set(IOS True) + +# Required as of cmake 2.8.10 +set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING "Force unset of the deployment target for iOS" FORCE) + +# Determine the cmake host system version so we know where to find the iOS SDKs +find_program(CMAKE_UNAME uname /bin /usr/bin /usr/local/bin) +if(CMAKE_UNAME) + exec_program(uname ARGS -r OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION) + string(REGEX REPLACE "^([0-9]+)\\.([0-9]+).*$" "\\1" DARWIN_MAJOR_VERSION "${CMAKE_HOST_SYSTEM_VERSION}") +endif(CMAKE_UNAME) + +# Force the compilers to gcc for iOS +set(CMAKE_C_COMPILER /usr/bin/gcc CACHE STRING "") +set(CMAKE_CXX_COMPILER /usr/bin/g++ CACHE STRING "") +set(CMAKE_AR ar CACHE FILEPATH "" FORCE) +set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) +set(PKG_CONFIG_EXECUTABLE pkg-config CACHE FILEPATH "" FORCE) + +# Setup iOS platform unless specified manually with IOS_PLATFORM +if(NOT DEFINED IOS_PLATFORM) + set(IOS_PLATFORM "OS") +endif(NOT DEFINED IOS_PLATFORM) +set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") + +# Check the platform selection and setup for developer root +if(${IOS_PLATFORM} STREQUAL "OS") + set(IOS_PLATFORM_LOCATION "iPhoneOS.platform") + set(XCODE_IOS_PLATFORM iphoneos) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos") +elseif(${IOS_PLATFORM} STREQUAL "SIMULATOR") + set(IOS_PLATFORM_LOCATION "iPhoneSimulator.platform") + set(XCODE_IOS_PLATFORM iphonesimulator) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphonesimulator") +elseif(${IOS_PLATFORM} STREQUAL "WATCHOS") + set(IOS_PLATFORM_LOCATION "WatchOS.platform") + set(XCODE_IOS_PLATFORM watchos) + + # This causes the installers to properly locate the output libraries + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-watchos") +else(${IOS_PLATFORM} STREQUAL "OS") + message(FATAL_ERROR + "Unsupported IOS_PLATFORM value selected. " + "Please choose OS, SIMULATOR, or WATCHOS.") +endif() + +# All iOS/Darwin specific settings - some may be redundant +set(CMAKE_SHARED_LIBRARY_PREFIX "lib") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set(CMAKE_SHARED_MODULE_PREFIX "lib") +set(CMAKE_SHARED_MODULE_SUFFIX ".so") +set(CMAKE_MODULE_EXISTS 1) +set(CMAKE_DL_LIBS "") + +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +if(IOS_DEPLOYMENT_TARGET) + set(XCODE_IOS_PLATFORM_VERSION_FLAGS "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") +endif() + +# Hidden visibilty is required for cxx on iOS +set(CMAKE_C_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS}") +set(CMAKE_CXX_FLAGS_INIT "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -fvisibility-inlines-hidden") + +set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") +set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") + +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") + +# hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old build tree +# (where install_name_tool was hardcoded) and where CMAKE_INSTALL_NAME_TOOL isn't in the cache +# and still cmake didn't fail in CMakeFindBinUtils.cmake (because it isn't rerun) +# hardcode CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did before, Alex +if(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) +endif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + +# Setup iOS deployment target +set(IOS_DEPLOYMENT_TARGET ${IOS_DEPLOYMENT_TARGET} CACHE STRING "Minimum iOS version") + +# Setup iOS developer location unless specified manually with CMAKE_IOS_DEVELOPER_ROOT +# Note Xcode 4.3 changed the installation location, choose the most recent one available +exec_program(/usr/bin/xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR) +set(XCODE_POST_43_ROOT "${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +set(XCODE_PRE_43_ROOT "/Developer/Platforms/${IOS_PLATFORM_LOCATION}/Developer") +if(NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) + if(EXISTS ${XCODE_POST_43_ROOT}) + set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_POST_43_ROOT}) + elseif(EXISTS ${XCODE_PRE_43_ROOT}) + set(CMAKE_IOS_DEVELOPER_ROOT ${XCODE_PRE_43_ROOT}) + endif(EXISTS ${XCODE_POST_43_ROOT}) +endif(NOT DEFINED CMAKE_IOS_DEVELOPER_ROOT) +set(CMAKE_IOS_DEVELOPER_ROOT ${CMAKE_IOS_DEVELOPER_ROOT} CACHE PATH "Location of iOS Platform") + +# Find and use the most recent iOS sdk unless specified manually with CMAKE_IOS_SDK_ROOT +if(NOT DEFINED CMAKE_IOS_SDK_ROOT) + file(GLOB _CMAKE_IOS_SDKS "${CMAKE_IOS_DEVELOPER_ROOT}/SDKs/*") + if(_CMAKE_IOS_SDKS) + list(SORT _CMAKE_IOS_SDKS) + list(REVERSE _CMAKE_IOS_SDKS) + list(GET _CMAKE_IOS_SDKS 0 CMAKE_IOS_SDK_ROOT) + else(_CMAKE_IOS_SDKS) + message(FATAL_ERROR "No iOS SDK's found in default search path ${CMAKE_IOS_DEVELOPER_ROOT}. Manually set CMAKE_IOS_SDK_ROOT or install the iOS SDK.") + endif(_CMAKE_IOS_SDKS) + message(STATUS "Toolchain using default iOS SDK: ${CMAKE_IOS_SDK_ROOT}") +endif(NOT DEFINED CMAKE_IOS_SDK_ROOT) +set(CMAKE_IOS_SDK_ROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Location of the selected iOS SDK") + +# Set the sysroot default to the most recent SDK +set(CMAKE_OSX_SYSROOT ${CMAKE_IOS_SDK_ROOT} CACHE PATH "Sysroot used for iOS support") + +# set the architecture for iOS +if(IOS_PLATFORM STREQUAL "OS") + set(DEFAULT_IOS_ARCH "arm64") +elseif(IOS_PLATFORM STREQUAL "SIMULATOR") + set(DEFAULT_IOS_ARCH "x86_64") +elseif(IOS_PLATFORM STREQUAL "WATCHOS") + set(DEFAULT_IOS_ARCH "armv7k;arm64_32") +endif() + +set(IOS_ARCH ${DEFAULT_IOS_ARCH} CACHE STRING "Build architecture for iOS") +set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE STRING "Build architecture for iOS") + +# Set the find root to the iOS developer roots and to user defined paths +set(CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_IOS_SDK_ROOT} ${CMAKE_PREFIX_PATH} CACHE STRING "iOS find search path root") + +# default to searching for frameworks first +set(CMAKE_FIND_FRAMEWORK FIRST) + +# set up the default search directories for frameworks +set(CMAKE_SYSTEM_FRAMEWORK_PATH + ${CMAKE_IOS_SDK_ROOT}/System/Library/Frameworks + ${CMAKE_IOS_SDK_ROOT}/System/Library/PrivateFrameworks + ${CMAKE_IOS_SDK_ROOT}/Developer/Library/Frameworks +) + +# only search the iOS sdks, not the remainder of the host filesystem +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +# This little macro lets you set any XCode specific property +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE) + set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE}) +endmacro(set_xcode_property) + +# This macro lets you find executable programs on the host system +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(IOS FALSE) + + find_package(${ARGN}) + + set(IOS TRUE) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endmacro(find_host_package) diff --git a/docs/Makefile b/docs/Makefile index 2ca4b0d71a2..1cacf08002f 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -2,7 +2,7 @@ # # You can set these variables from the command line. -SPHINXOPTS = +SPHINXOPTS = -W # turn warnings into errors SPHINXBUILD = sphinx-build SPHINXPROJ = torchvision SOURCEDIR = source diff --git a/docs/requirements.txt b/docs/requirements.txt index 014f642d0eb..f649853cd03 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,3 @@ sphinx==1.7.3 sphinxcontrib-googleanalytics --e git://github.com/snide/sphinx_rtd_theme.git#egg=sphinx_rtd_theme +-e git+git://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html new file mode 100644 index 00000000000..aaa15d56e02 --- /dev/null +++ b/docs/source/_templates/layout.html @@ -0,0 +1,8 @@ +{% extends "!layout.html" %} + +{% block sidebartitle %} + + {% include "searchbox.html" %} +{% endblock %} diff --git a/docs/source/conf.py b/docs/source/conf.py index 3c277168a70..606bc34f841 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,9 +22,24 @@ # sys.path.insert(0, os.path.abspath('.')) import torch import torchvision -import sphinx_rtd_theme +import pytorch_sphinx_theme +from sphinxcontrib import googleanalytics +# Wrap sphinxcontrib-googleanalytics setup() function to avoid a Sphinx warning: +# "WARNING: extension ‘sphinxcontrib.googleanalytics’ returned an unsupported +# object from its setup() function; it should return None or a metadata +# dictionary" +_googleanalytics_setup_original = googleanalytics.setup + + +def _googleanalytics_setup_wrapper(app): + _googleanalytics_setup_original(app) + return {"version": "0.1"} + + +googleanalytics.setup = _googleanalytics_setup_wrapper + # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. @@ -48,6 +63,8 @@ ] napoleon_use_ivar = True +napoleon_numpy_docstring = False +napoleon_google_docstring = True googleanalytics_id = 'UA-90545585-1' googleanalytics_enabled = True @@ -66,7 +83,7 @@ # General information about the project. project = 'Torchvision' -copyright = '2017, Torch Contributors' +copyright = '2017-present, Torch Contributors' author = 'Torch Contributors' # The version info for the project you're documenting, acts as replacement for @@ -104,8 +121,8 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +html_theme = 'pytorch_sphinx_theme' +html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -115,6 +132,8 @@ 'collapse_navigation': False, 'display_version': True, 'logo_only': True, + 'pytorch_project': 'docs', + 'navigation_with_keys': True, } html_logo = '_static/img/pytorch-logo-dark.svg' @@ -125,12 +144,12 @@ html_static_path = ['_static'] # html_style_path = 'css/pytorch_theme.css' -html_context = { - 'css_files': [ - 'https://fonts.googleapis.com/css?family=Lato', - '_static/css/pytorch_theme.css' - ], -} +# html_context = { +# 'css_files': [ +# 'https://fonts.googleapis.com/css?family=Lato', +# '_static/css/pytorch_theme.css' +# ], +# } # -- Options for HTMLHelp output ------------------------------------------ @@ -208,7 +227,7 @@ def patched_make_field(self, types, domain, items, **kw): # `kw` catches `env=None` needed for newer sphinx while maintaining # backwards compatibility when passed along further down! - # type: (list, unicode, tuple) -> nodes.field + # type: (list, unicode, tuple) -> nodes.field # noqa: F821 def handle_item(fieldarg, content): par = nodes.paragraph() par += addnodes.literal_strong('', fieldarg) # Patch: this line added diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index 040962edc6a..cb02f2bcaa3 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -4,7 +4,7 @@ torchvision.datasets All datasets are subclasses of :class:`torch.utils.data.Dataset` i.e, they have ``__getitem__`` and ``__len__`` methods implemented. Hence, they can all be passed to a :class:`torch.utils.data.DataLoader` -which can load multiple samples parallelly using ``torch.multiprocessing`` workers. +which can load multiple samples in parallel using ``torch.multiprocessing`` workers. For example: :: imagenet_data = torchvision.datasets.ImageNet('path/to/imagenet_root/') @@ -25,35 +25,42 @@ All the datasets have almost similar API. They all have two common arguments: .. currentmodule:: torchvision.datasets -MNIST -~~~~~ - -.. autoclass:: MNIST +Caltech +~~~~~~~ -Fashion-MNIST -~~~~~~~~~~~~~ +.. autoclass:: Caltech101 + :members: __getitem__ + :special-members: -.. autoclass:: FashionMNIST +.. autoclass:: Caltech256 + :members: __getitem__ + :special-members: -KMNIST -~~~~~~~~~~~~~ +CelebA +~~~~~~ -.. autoclass:: KMNIST +.. autoclass:: CelebA + :members: __getitem__ + :special-members: -EMNIST -~~~~~~ +CIFAR +~~~~~ -.. autoclass:: EMNIST +.. autoclass:: CIFAR10 + :members: __getitem__ + :special-members: -QMNIST -~~~~~~ +.. autoclass:: CIFAR100 -.. autoclass:: QMNIST +Cityscapes +~~~~~~~~~~ -FakeData -~~~~~~~~ +.. note :: + Requires Cityscape to be downloaded. -.. autoclass:: FakeData +.. autoclass:: Cityscapes + :members: __getitem__ + :special-members: COCO ~~~~ @@ -79,28 +86,53 @@ Detection :members: __getitem__ :special-members: -LSUN -~~~~ +DatasetFolder +~~~~~~~~~~~~~ -.. autoclass:: LSUN +.. autoclass:: DatasetFolder :members: __getitem__ :special-members: -ImageFolder -~~~~~~~~~~~ -.. autoclass:: ImageFolder +EMNIST +~~~~~~ + +.. autoclass:: EMNIST + +FakeData +~~~~~~~~ + +.. autoclass:: FakeData + +Fashion-MNIST +~~~~~~~~~~~~~ + +.. autoclass:: FashionMNIST + +Flickr +~~~~~~ + +.. autoclass:: Flickr8k :members: __getitem__ :special-members: -DatasetFolder -~~~~~~~~~~~~~ +.. autoclass:: Flickr30k + :members: __getitem__ + :special-members: -.. autoclass:: DatasetFolder +HMDB51 +~~~~~~~ + +.. autoclass:: HMDB51 :members: __getitem__ :special-members: +ImageFolder +~~~~~~~~~~~ +.. autoclass:: ImageFolder + :members: __getitem__ + :special-members: ImageNet ~~~~~~~~~~~ @@ -110,87 +142,100 @@ ImageNet .. note :: This requires `scipy` to be installed +Kinetics-400 +~~~~~~~~~~~~ -CIFAR -~~~~~ +.. autoclass:: Kinetics400 + :members: __getitem__ + :special-members: -.. autoclass:: CIFAR10 +KITTI +~~~~~~~~~ + +.. autoclass:: Kitti :members: __getitem__ :special-members: -.. autoclass:: CIFAR100 +KMNIST +~~~~~~~~~~~~~ -STL10 -~~~~~ +.. autoclass:: KMNIST +LSUN +~~~~ -.. autoclass:: STL10 +.. autoclass:: LSUN :members: __getitem__ :special-members: -SVHN +MNIST ~~~~~ +.. autoclass:: MNIST -.. autoclass:: SVHN - :members: __getitem__ - :special-members: +Omniglot +~~~~~~~~ + +.. autoclass:: Omniglot PhotoTour ~~~~~~~~~ - .. autoclass:: PhotoTour :members: __getitem__ :special-members: -SBU -~~~ - +Places365 +~~~~~~~~~ -.. autoclass:: SBU +.. autoclass:: Places365 :members: __getitem__ :special-members: -Flickr +QMNIST ~~~~~~ +.. autoclass:: QMNIST + +SBD +~~~~~~ -.. autoclass:: Flickr8k +.. autoclass:: SBDataset :members: __getitem__ :special-members: -.. autoclass:: Flickr30k +SBU +~~~ + +.. autoclass:: SBU :members: __getitem__ :special-members: -VOC -~~~~~~ - +SEMEION +~~~~~~~ -.. autoclass:: VOCSegmentation +.. autoclass:: SEMEION :members: __getitem__ :special-members: -.. autoclass:: VOCDetection +STL10 +~~~~~ + +.. autoclass:: STL10 :members: __getitem__ :special-members: -Cityscapes -~~~~~~~~~~ - -.. note :: - Requires Cityscape to be downloaded. +SVHN +~~~~~ -.. autoclass:: Cityscapes +.. autoclass:: SVHN :members: __getitem__ :special-members: -SBD -~~~~~~ - +UCF101 +~~~~~~~ -.. autoclass:: SBDataset +.. autoclass:: UCF101 :members: __getitem__ :special-members: @@ -201,26 +246,20 @@ USPS :members: __getitem__ :special-members: +VOC +~~~~~~ -Kinetics-400 -~~~~~~~~~~~~ - -.. autoclass:: Kinetics400 +.. autoclass:: VOCSegmentation :members: __getitem__ :special-members: - -HMDB51 -~~~~~~~ - -.. autoclass:: HMDB51 +.. autoclass:: VOCDetection :members: __getitem__ :special-members: +WIDERFace +~~~~~~~~~ -UCF101 -~~~~~~~ - -.. autoclass:: UCF101 +.. autoclass:: WIDERFace :members: __getitem__ :special-members: diff --git a/docs/source/index.rst b/docs/source/index.rst index 9de82b6e7fc..d4aefafed1d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,5 +1,28 @@ torchvision =========== +This library is part of the `PyTorch +`_ project. PyTorch is an open source +machine learning framework. + +Features described in this documentation are classified by release status: + + *Stable:* These features will be maintained long-term and there should generally + be no major performance limitations or gaps in documentation. + We also expect to maintain backwards compatibility (although + breaking changes can happen and notice will be given one release ahead + of time). + + *Beta:* Features are tagged as Beta because the API may change based on + user feedback, because the performance needs to improve, or because + coverage across operators is not yet complete. For Beta features, we are + committing to seeing the feature through to the Stable classification. + We are not, however, committing to backwards compatibility. + + *Prototype:* These features are typically not available as part of + binary distributions like PyPI or Conda, except sometimes behind run-time + flags, and are at an early stage for feedback and testing. + + The :mod:`torchvision` package consists of popular datasets, model architectures, and common image transformations for computer vision. @@ -17,3 +40,15 @@ architectures, and common image transformations for computer vision. .. automodule:: torchvision :members: + +.. toctree:: + :maxdepth: 1 + :caption: PyTorch Libraries + + PyTorch + torchaudio + torchtext + torchvision + TorchElastic + TorchServe + PyTorch on XLA Devices diff --git a/docs/source/io.rst b/docs/source/io.rst index e7aeedc0716..e85951e719a 100644 --- a/docs/source/io.rst +++ b/docs/source/io.rst @@ -4,7 +4,8 @@ torchvision.io .. currentmodule:: torchvision.io The :mod:`torchvision.io` package provides functions for performing IO -operations. They are currently specific to reading and writing video. +operations. They are currently specific to reading and writing video and +images. Video ----- @@ -14,3 +15,58 @@ Video .. autofunction:: read_video_timestamps .. autofunction:: write_video + + +Fine-grained video API +---------------------- + +In addition to the :mod:`read_video` function, we provide a high-performance +lower-level API for more fine-grained control compared to the :mod:`read_video` function. +It does all this whilst fully supporting torchscript. + +.. autoclass:: VideoReader + :members: __next__, get_metadata, set_current_stream, seek + + +Example of inspecting a video: + +.. code:: python + + import torchvision + video_path = "path to a test video" + # Constructor allocates memory and a threaded decoder + # instance per video. At the moment it takes two arguments: + # path to the video file, and a wanted stream. + reader = torchvision.io.VideoReader(video_path, "video") + + # The information about the video can be retrieved using the + # `get_metadata()` method. It returns a dictionary for every stream, with + # duration and other relevant metadata (often frame rate) + reader_md = reader.get_metadata() + + # metadata is structured as a dict of dicts with following structure + # {"stream_type": {"attribute": [attribute per stream]}} + # + # following would print out the list of frame rates for every present video stream + print(reader_md["video"]["fps"]) + + # we explicitly select the stream we would like to operate on. In + # the constructor we select a default video stream, but + # in practice, we can set whichever stream we would like + video.set_current_stream("video:0") + + +Image +----- + +.. autofunction:: read_image + +.. autofunction:: decode_image + +.. autofunction:: encode_jpeg + +.. autofunction:: write_jpeg + +.. autofunction:: encode_png + +.. autofunction:: write_png diff --git a/docs/source/models.rst b/docs/source/models.rst index e1a141092dc..09ec450574b 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -22,7 +22,8 @@ architectures for image classification: - `Inception`_ v3 - `GoogLeNet`_ - `ShuffleNet`_ v2 -- `MobileNet`_ v2 +- `MobileNetV2`_ +- `MobileNetV3`_ - `ResNeXt`_ - `Wide ResNet`_ - `MNASNet`_ @@ -40,7 +41,9 @@ You can construct a model with random weights by calling its constructor: inception = models.inception_v3() googlenet = models.googlenet() shufflenet = models.shufflenet_v2_x1_0() - mobilenet = models.mobilenet_v2() + mobilenet_v2 = models.mobilenet_v2() + mobilenet_v3_large = models.mobilenet_v3_large() + mobilenet_v3_small = models.mobilenet_v3_small() resnext50_32x4d = models.resnext50_32x4d() wide_resnet50_2 = models.wide_resnet50_2() mnasnet = models.mnasnet1_0() @@ -59,7 +62,9 @@ These can be constructed by passing ``pretrained=True``: inception = models.inception_v3(pretrained=True) googlenet = models.googlenet(pretrained=True) shufflenet = models.shufflenet_v2_x1_0(pretrained=True) - mobilenet = models.mobilenet_v2(pretrained=True) + mobilenet_v2 = models.mobilenet_v2(pretrained=True) + mobilenet_v3_large = models.mobilenet_v3_large(pretrained=True) + mobilenet_v3_small = models.mobilenet_v3_small(pretrained=True) resnext50_32x4d = models.resnext50_32x4d(pretrained=True) wide_resnet50_2 = models.wide_resnet50_2(pretrained=True) mnasnet = models.mnasnet1_0(pretrained=True) @@ -71,7 +76,7 @@ This directory can be set using the `TORCH_MODEL_ZOO` environment variable. See Some models use modules which have different training and evaluation behavior, such as batch normalization. To switch between these modes, use ``model.train()`` or ``model.eval()`` as appropriate. See -:meth:`~torch.nn.Module.train` or :meth:`~torch.nn.Module.eval` for details. +:meth:`~torch.nn.Module.train` or :meth:`~torch.nn.Module.eval` for details. All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), @@ -86,40 +91,66 @@ You can use the following transform to normalize:: An example of such normalization can be found in the imagenet example `here `_ +The process for obtaining the values of `mean` and `std` is roughly equivalent +to:: + + import torch + from torchvision import datasets, transforms as T + + transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()]) + dataset = datasets.ImageNet(".", split="train", transform=transform) + + means = [] + stds = [] + for img in subset(dataset): + means.append(torch.mean(img)) + stds.append(torch.std(img)) + + mean = torch.mean(torch.tensor(means)) + std = torch.mean(torch.tensor(stds)) + +Unfortunately, the concrete `subset` that was used is lost. For more +information see `this discussion `_ +or `these experiments `_. + ImageNet 1-crop error rates (224x224) ================================ ============= ============= -Network Top-1 error Top-5 error +Model Acc@1 Acc@5 ================================ ============= ============= -AlexNet 43.45 20.91 -VGG-11 30.98 11.37 -VGG-13 30.07 10.75 -VGG-16 28.41 9.62 -VGG-19 27.62 9.12 -VGG-11 with batch normalization 29.62 10.19 -VGG-13 with batch normalization 28.45 9.63 -VGG-16 with batch normalization 26.63 8.50 -VGG-19 with batch normalization 25.76 8.15 -ResNet-18 30.24 10.92 -ResNet-34 26.70 8.58 -ResNet-50 23.85 7.13 -ResNet-101 22.63 6.44 -ResNet-152 21.69 5.94 -SqueezeNet 1.0 41.90 19.58 -SqueezeNet 1.1 41.81 19.38 -Densenet-121 25.35 7.83 -Densenet-169 24.00 7.00 -Densenet-201 22.80 6.43 -Densenet-161 22.35 6.20 -Inception v3 22.55 6.44 -GoogleNet 30.22 10.47 -ShuffleNet V2 30.64 11.68 -MobileNet V2 28.12 9.71 -ResNeXt-50-32x4d 22.38 6.30 -ResNeXt-101-32x8d 20.69 5.47 -Wide ResNet-50-2 21.49 5.91 -Wide ResNet-101-2 21.16 5.72 -MNASNet 1.0 26.49 8.456 +AlexNet 56.522 79.066 +VGG-11 69.020 88.628 +VGG-13 69.928 89.246 +VGG-16 71.592 90.382 +VGG-19 72.376 90.876 +VGG-11 with batch normalization 70.370 89.810 +VGG-13 with batch normalization 71.586 90.374 +VGG-16 with batch normalization 73.360 91.516 +VGG-19 with batch normalization 74.218 91.842 +ResNet-18 69.758 89.078 +ResNet-34 73.314 91.420 +ResNet-50 76.130 92.862 +ResNet-101 77.374 93.546 +ResNet-152 78.312 94.046 +SqueezeNet 1.0 58.092 80.420 +SqueezeNet 1.1 58.178 80.624 +Densenet-121 74.434 91.972 +Densenet-169 75.600 92.806 +Densenet-201 76.896 93.370 +Densenet-161 77.138 93.560 +Inception v3 77.294 93.450 +GoogleNet 69.778 89.530 +ShuffleNet V2 x1.0 69.362 88.316 +ShuffleNet V2 x0.5 60.552 81.746 +MobileNet V2 71.878 90.286 +MobileNet V3 Large 74.042 91.340 +MobileNet V3 Small 67.668 87.402 +ResNeXt-50-32x4d 77.618 93.698 +ResNeXt-101-32x8d 79.312 94.526 +Wide ResNet-50-2 78.468 94.086 +Wide ResNet-101-2 78.848 94.284 +MNASNet 1.0 73.456 91.510 +MNASNet 0.5 67.734 87.490 ================================ ============= ============= @@ -131,7 +162,8 @@ MNASNet 1.0 26.49 8.456 .. _Inception: https://arxiv.org/abs/1512.00567 .. _GoogLeNet: https://arxiv.org/abs/1409.4842 .. _ShuffleNet: https://arxiv.org/abs/1807.11164 -.. _MobileNet: https://arxiv.org/abs/1801.04381 +.. _MobileNetV2: https://arxiv.org/abs/1801.04381 +.. _MobileNetV3: https://arxiv.org/abs/1905.02244 .. _ResNeXt: https://arxiv.org/abs/1611.05431 .. _MNASNet: https://arxiv.org/abs/1807.11626 @@ -183,11 +215,19 @@ Inception v3 .. autofunction:: inception_v3 +.. note :: + This requires `scipy` to be installed + + GoogLeNet ------------ .. autofunction:: googlenet +.. note :: + This requires `scipy` to be installed + + ShuffleNet v2 ------------- @@ -201,6 +241,12 @@ MobileNet v2 .. autofunction:: mobilenet_v2 +MobileNet v3 +------------- + +.. autofunction:: mobilenet_v3_large +.. autofunction:: mobilenet_v3_small + ResNext ------- @@ -221,6 +267,52 @@ MNASNet .. autofunction:: mnasnet1_0 .. autofunction:: mnasnet1_3 +Quantized Models +---------------- + +The following architectures provide support for INT8 quantized models. You can get +a model with random weights by calling its constructor: + +.. code:: python + + import torchvision.models as models + googlenet = models.quantization.googlenet() + inception_v3 = models.quantization.inception_v3() + mobilenet_v2 = models.quantization.mobilenet_v2() + mobilenet_v3_large = models.quantization.mobilenet_v3_large() + resnet18 = models.quantization.resnet18() + resnet50 = models.quantization.resnet50() + resnext101_32x8d = models.quantization.resnext101_32x8d() + shufflenet_v2_x0_5 = models.quantization.shufflenet_v2_x0_5() + shufflenet_v2_x1_0 = models.quantization.shufflenet_v2_x1_0() + shufflenet_v2_x1_5 = models.quantization.shufflenet_v2_x1_5() + shufflenet_v2_x2_0 = models.quantization.shufflenet_v2_x2_0() + +Obtaining a pre-trained quantized model can be done with a few lines of code: + +.. code:: python + + import torchvision.models as models + model = models.quantization.mobilenet_v2(pretrained=True, quantize=True) + model.eval() + # run the model with quantized inputs and weights + out = model(torch.rand(1, 3, 224, 224)) + +We provide pre-trained quantized weights for the following models: + +================================ ============= ============= +Model Acc@1 Acc@5 +================================ ============= ============= +MobileNet V2 71.658 90.150 +MobileNet V3 Large 73.004 90.858 +ShuffleNet V2 68.360 87.582 +ResNet 18 69.494 88.882 +ResNet 50 75.920 92.814 +ResNext 101 32x8d 78.986 94.480 +Inception V3 77.176 93.354 +GoogleNet 69.826 89.404 +================================ ============= ============= + Semantic Segmentation ===================== @@ -228,8 +320,9 @@ Semantic Segmentation The models subpackage contains definitions for the following model architectures for semantic segmentation: -- `FCN ResNet101 `_ -- `DeepLabV3 ResNet101 `_ +- `FCN ResNet50, ResNet101 `_ +- `DeepLabV3 ResNet50, ResNet101, MobileNetV3-Large `_ +- `LR-ASPP MobileNetV3-Large `_ As with image classification models, all pre-trained models expect input images normalized in the same way. The images have to be loaded in to a range of ``[0, 1]`` and then normalized using @@ -252,8 +345,12 @@ The accuracies of the pre-trained models evaluated on COCO val2017 are as follow ================================ ============= ==================== Network mean IoU global pixelwise acc ================================ ============= ==================== +FCN ResNet50 60.5 91.4 FCN ResNet101 63.7 91.9 +DeepLabV3 ResNet50 66.4 92.4 DeepLabV3 ResNet101 67.4 92.4 +DeepLabV3 MobileNetV3-Large 60.3 91.2 +LR-ASPP MobileNetV3-Large 57.9 91.2 ================================ ============= ==================== @@ -269,6 +366,13 @@ DeepLabV3 .. autofunction:: torchvision.models.segmentation.deeplabv3_resnet50 .. autofunction:: torchvision.models.segmentation.deeplabv3_resnet101 +.. autofunction:: torchvision.models.segmentation.deeplabv3_mobilenet_v3_large + + +LR-ASPP +------- + +.. autofunction:: torchvision.models.segmentation.lraspp_mobilenet_v3_large Object Detection, Instance Segmentation and Person Keypoint Detection @@ -314,12 +418,15 @@ models return the predictions of the following classes: Here are the summary of the accuracies for the models trained on the instances set of COCO train2017 and evaluated on COCO val2017. -================================ ======= ======== =========== -Network box AP mask AP keypoint AP -================================ ======= ======== =========== -Faster R-CNN ResNet-50 FPN 37.0 - - -Mask R-CNN ResNet-50 FPN 37.9 34.6 - -================================ ======= ======== =========== +====================================== ======= ======== =========== +Network box AP mask AP keypoint AP +====================================== ======= ======== =========== +Faster R-CNN ResNet-50 FPN 37.0 - - +Faster R-CNN MobileNetV3-Large FPN 32.8 - - +Faster R-CNN MobileNetV3-Large 320 FPN 22.8 - - +RetinaNet ResNet-50 FPN 36.4 - - +Mask R-CNN ResNet-50 FPN 37.9 34.6 - +====================================== ======= ======== =========== For person keypoint detection, the accuracies for the pre-trained models are as follows @@ -369,19 +476,30 @@ For test time, we report the time for the model evaluation and postprocessing (including mask pasting in image), but not the time for computing the precision-recall. -============================== =================== ================== =========== -Network train time (s / it) test time (s / it) memory (GB) -============================== =================== ================== =========== -Faster R-CNN ResNet-50 FPN 0.2288 0.0590 5.2 -Mask R-CNN ResNet-50 FPN 0.2728 0.0903 5.4 -Keypoint R-CNN ResNet-50 FPN 0.3789 0.1242 6.8 -============================== =================== ================== =========== +====================================== =================== ================== =========== +Network train time (s / it) test time (s / it) memory (GB) +====================================== =================== ================== =========== +Faster R-CNN ResNet-50 FPN 0.2288 0.0590 5.2 +Faster R-CNN MobileNetV3-Large FPN 0.1020 0.0415 1.0 +Faster R-CNN MobileNetV3-Large 320 FPN 0.0978 0.0376 0.6 +RetinaNet ResNet-50 FPN 0.2514 0.0939 4.1 +Mask R-CNN ResNet-50 FPN 0.2728 0.0903 5.4 +Keypoint R-CNN ResNet-50 FPN 0.3789 0.1242 6.8 +====================================== =================== ================== =========== Faster R-CNN ------------ .. autofunction:: torchvision.models.detection.fasterrcnn_resnet50_fpn +.. autofunction:: torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn +.. autofunction:: torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn + + +RetinaNet +------------ + +.. autofunction:: torchvision.models.detection.retinanet_resnet50_fpn Mask R-CNN diff --git a/docs/source/ops.rst b/docs/source/ops.rst index ec87d02556e..cdebe9721c3 100644 --- a/docs/source/ops.rst +++ b/docs/source/ops.rst @@ -6,12 +6,28 @@ torchvision.ops :mod:`torchvision.ops` implements operators that are specific for Computer Vision. .. note:: - Those operators currently do not support TorchScript. + All operators have native support for TorchScript. .. autofunction:: nms +.. autofunction:: batched_nms +.. autofunction:: remove_small_boxes +.. autofunction:: clip_boxes_to_image +.. autofunction:: box_convert +.. autofunction:: box_area +.. autofunction:: box_iou +.. autofunction:: generalized_box_iou .. autofunction:: roi_align +.. autofunction:: ps_roi_align .. autofunction:: roi_pool +.. autofunction:: ps_roi_pool +.. autofunction:: deform_conv2d +.. autofunction:: sigmoid_focal_loss .. autoclass:: RoIAlign +.. autoclass:: PSRoIAlign .. autoclass:: RoIPool +.. autoclass:: PSRoIPool +.. autoclass:: DeformConv2d +.. autoclass:: MultiScaleRoIAlign +.. autoclass:: FeaturePyramidNetwork diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index 2e0c6cefb8d..21e2c152626 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -4,83 +4,199 @@ torchvision.transforms .. currentmodule:: torchvision.transforms Transforms are common image transformations. They can be chained together using :class:`Compose`. -Additionally, there is the :mod:`torchvision.transforms.functional` module. -Functional transforms give fine-grained control over the transformations. +Most transform classes have a function equivalent: :ref:`functional +transforms ` give fine-grained control over the +transformations. This is useful if you have to build a more complex transformation pipeline (e.g. in the case of segmentation tasks). +Most transformations accept both `PIL `_ +images and tensor images, although some transformations are :ref:`PIL-only +` and some are :ref:`tensor-only +`. The :ref:`conversion_transforms` may be used to +convert to and from PIL images. + +The transformations that accept tensor images also accept batches of tensor +images. A Tensor Image is a tensor with ``(C, H, W)`` shape, where ``C`` is a +number of channels, ``H`` and ``W`` are image height and width. A batch of +Tensor Images is a tensor of ``(B, C, H, W)`` shape, where ``B`` is a number +of images in the batch. + +The expected range of the values of a tensor image is implicitely defined by +the tensor dtype. Tensor images with a float dtype are expected to have +values in ``[0, 1)``. Tensor images with an integer dtype are expected to +have values in ``[0, MAX_DTYPE]`` where ``MAX_DTYPE`` is the largest value +that can be represented in that dtype. + +Randomized transformations will apply the same transformation to all the +images of a given batch, but they will produce different transformations +across calls. For reproducible transformations across calls, you may use +:ref:`functional transforms `. + +.. warning:: + + Since v0.8.0 all random transformations are using torch default random generator to sample random parameters. + It is a backward compatibility breaking change and user should set the random state as following: + + .. code:: python + + # Previous versions + # import random + # random.seed(12) + + # Now + import torch + torch.manual_seed(17) + + Please, keep in mind that the same seed for torch random generator and Python random generator will not + produce the same results. + + +Scriptable transforms +--------------------- + +In order to script the transformations, please use ``torch.nn.Sequential`` instead of :class:`Compose`. + +.. code:: python + + transforms = torch.nn.Sequential( + transforms.CenterCrop(10), + transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ) + scripted_transforms = torch.jit.script(transforms) + +Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor`` and does not require +`lambda` functions or ``PIL.Image``. + +For any custom transformations to be used with ``torch.jit.script``, they should be derived from ``torch.nn.Module``. + + +Compositions of transforms +-------------------------- + .. autoclass:: Compose -Transforms on PIL Image ------------------------ + +Transforms on PIL Image and torch.\*Tensor +------------------------------------------ .. autoclass:: CenterCrop + :members: .. autoclass:: ColorJitter + :members: .. autoclass:: FiveCrop + :members: .. autoclass:: Grayscale + :members: .. autoclass:: Pad + :members: .. autoclass:: RandomAffine + :members: .. autoclass:: RandomApply -.. autoclass:: RandomChoice - .. autoclass:: RandomCrop + :members: .. autoclass:: RandomGrayscale + :members: .. autoclass:: RandomHorizontalFlip - -.. autoclass:: RandomOrder + :members: .. autoclass:: RandomPerspective + :members: .. autoclass:: RandomResizedCrop + :members: .. autoclass:: RandomRotation + :members: .. autoclass:: RandomSizedCrop + :members: .. autoclass:: RandomVerticalFlip + :members: .. autoclass:: Resize + :members: .. autoclass:: Scale + :members: .. autoclass:: TenCrop + :members: -Transforms on torch.\*Tensor +.. autoclass:: GaussianBlur + :members: + +.. _transforms_pil_only: + +Transforms on PIL Image only ---------------------------- +.. autoclass:: RandomChoice + +.. autoclass:: RandomOrder + +.. _transforms_tensor_only: + +Transforms on torch.\*Tensor only +--------------------------------- + .. autoclass:: LinearTransformation + :members: .. autoclass:: Normalize - :members: __call__ - :special-members: + :members: .. autoclass:: RandomErasing + :members: + +.. autoclass:: ConvertImageDtype + +.. _conversion_transforms: Conversion Transforms --------------------- .. autoclass:: ToPILImage - :members: __call__ - :special-members: + :members: .. autoclass:: ToTensor - :members: __call__ - :special-members: + :members: + Generic Transforms ------------------ .. autoclass:: Lambda + :members: + + +AutoAugment Transforms +---------------------- + +`AutoAugment `_ is a common Data Augmentation technique that can improve the accuracy of Image Classification models. +Though the data augmentation policies are directly linked to their trained dataset, empirical studies show that +ImageNet policies provide significant improvements when applied to other datasets. +In TorchVision we implemented 3 policies learned on the following datasets: ImageNet, CIFAR10 and SVHN. +The new transform can be used standalone or mixed-and-matched with existing transforms: + +.. autoclass:: AutoAugmentPolicy + :members: + +.. autoclass:: AutoAugment + :members: + +.. _functional_transforms: Functional Transforms --------------------- @@ -88,7 +204,8 @@ Functional Transforms Functional transforms give you fine-grained control of the transformation pipeline. As opposed to the transformations above, functional transforms don't contain a random number generator for their parameters. -That means you have to specify/generate all parameters, but you can reuse the functional transform. +That means you have to specify/generate all parameters, but the functional transform will give you +reproducible results across calls. Example: you can apply a functional transform with the same parameters to multiple images like this: diff --git a/docs/source/utils.rst b/docs/source/utils.rst index ad2fc91c897..acaf785d817 100644 --- a/docs/source/utils.rst +++ b/docs/source/utils.rst @@ -7,3 +7,6 @@ torchvision.utils .. autofunction:: save_image +.. autofunction:: draw_bounding_boxes + +.. autofunction:: draw_segmentation_masks diff --git a/examples/cpp/hello_world/CMakeLists.txt b/examples/cpp/hello_world/CMakeLists.txt new file mode 100644 index 00000000000..3244efb392b --- /dev/null +++ b/examples/cpp/hello_world/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.10) +project(hello-world) + +# The first thing do is to tell cmake to find the TorchVision library. +# The package pulls in all the necessary torch libraries, +# so there is no need to also add `find_package(Torch)` here. +find_package(TorchVision REQUIRED) + +add_executable(hello-world main.cpp) + +# We now need to link the TorchVision library to our executable. +# We can do that by using the TorchVision::TorchVision target, +# which also adds all the necessary torch dependencies. +target_compile_features(hello-world PUBLIC cxx_range_for) +target_link_libraries(hello-world TorchVision::TorchVision) +set_property(TARGET hello-world PROPERTY CXX_STANDARD 14) diff --git a/examples/cpp/hello_world/README.rst b/examples/cpp/hello_world/README.rst new file mode 100644 index 00000000000..aa5427a6f1c --- /dev/null +++ b/examples/cpp/hello_world/README.rst @@ -0,0 +1,19 @@ +Hello World! +============ + +This is a minimal example of getting TorchVision to work in C++ with CMake. + + +In order to successfully compile this example, make sure you have both ``LibTorch`` and +``TorchVision`` installed. +Once both dependencies are sorted, we can start the CMake fun: + +1) Create a ``build`` directory inside the current one. +2) from within the ``build`` directory, run the following commands: + - | ``cmake -DCMAKE_PREFIX_PATH=";" ..`` + | where ```` and ```` are the paths to the libtorch and torchvision installations. + - ``cmake --build .`` + +| That's it! +| You should now have a ``hello-world`` executable in your ``build`` folder. + Running it will output a (fairly long) tensor of random values to your terminal. \ No newline at end of file diff --git a/examples/cpp/hello_world/main.cpp b/examples/cpp/hello_world/main.cpp new file mode 100644 index 00000000000..3a75bdec6cb --- /dev/null +++ b/examples/cpp/hello_world/main.cpp @@ -0,0 +1,25 @@ +#include +#include +#include +#include + +int main() +{ + auto model = vision::models::ResNet18(); + model->eval(); + + // Create a random input tensor and run it through the model. + auto in = torch::rand({1, 3, 10, 10}); + auto out = model->forward(in); + + std::cout << out.sizes(); + + if (torch::cuda::is_available()) { + // Move model and inputs to GPU + model->to(torch::kCUDA); + auto gpu_in = in.to(torch::kCUDA); + auto gpu_out = model->forward(gpu_in); + + std::cout << gpu_out.sizes(); + } +} diff --git a/examples/python/README.md b/examples/python/README.md new file mode 100644 index 00000000000..1e6c66b5219 --- /dev/null +++ b/examples/python/README.md @@ -0,0 +1,22 @@ +# Python examples + +- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/vision/blob/master/examples/python/tensor_transforms.ipynb) +[Examples of Tensor Images transformations](https://github.com/pytorch/vision/blob/master/examples/python/tensor_transforms.ipynb) +- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/vision/blob/master/examples/python/video_api.ipynb) +[Example of VideoAPI](https://github.com/pytorch/vision/blob/master/examples/python/video_api.ipynb) +- [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/vision/blob/master/examples/python/visualization_utils.ipynb) +[Example of Visualization Utils](https://github.com/pytorch/vision/blob/master/examples/python/visualization_utils.ipynb) + + +Prior to v0.8.0, transforms in torchvision have traditionally been PIL-centric and presented multiple limitations due to +that. Now, since v0.8.0, transforms implementations are Tensor and PIL compatible and we can achieve the following new +features: +- transform multi-band torch tensor images (with more than 3-4 channels) +- torchscript transforms together with your model for deployment +- support for GPU acceleration +- batched transformation such as for videos +- read and decode data directly as torch tensor with torchscript support (for PNG and JPEG image formats) + +Furthermore, previously we used to provide a very high-level API for video decoding which left little control to the user. We're now expanding that API (and replacing it in the future) with a lower-level API that allows the user a frame-based access to a video. + +Torchvision also provides utilities to visualize results. You can make grid of images, plot bounding boxes as well as segmentation masks. Thse utilities work standalone as well as with torchvision models for detection and segmentation. diff --git a/examples/python/tensor_transforms.ipynb b/examples/python/tensor_transforms.ipynb new file mode 100644 index 00000000000..7bb5741947c --- /dev/null +++ b/examples/python/tensor_transforms.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "vjAC2mZnb4nz" + }, + "source": [ + "# Image transformations\n", + "\n", + "This notebook shows new features of torchvision image transformations. \n", + "\n", + "Prior to v0.8.0, transforms in torchvision have traditionally been PIL-centric and presented multiple limitations due to that. Now, since v0.8.0, transforms implementations are Tensor and PIL compatible and we can achieve the following new \n", + "features:\n", + "- transform multi-band torch tensor images (with more than 3-4 channels) \n", + "- torchscript transforms together with your model for deployment\n", + "- support for GPU acceleration\n", + "- batched transformation such as for videos\n", + "- read and decode data directly as torch tensor with torchscript support (for PNG and JPEG image formats)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "btaDWPDbgIyW", + "outputId": "8a83d408-f643-42da-d247-faf3a1bd3ae0" + }, + "outputs": [], + "source": [ + "import torch, torchvision\n", + "torch.__version__, torchvision.__version__" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9Vj9draNb4oA" + }, + "source": [ + "## Transforms on CPU/CUDA tensor images\n", + "\n", + "Let's show how to apply transformations on images opened directly as a torch tensors.\n", + "Now, torchvision provides image reading functions for PNG and JPG images with torchscript support. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Epp3hCy0b4oD" + }, + "outputs": [], + "source": [ + "from torchvision.datasets.utils import download_url\n", + "\n", + "download_url(\"https://farm1.static.flickr.com/152/434505223_8d1890e1e2.jpg\", \".\", \"test-image.jpg\")\n", + "download_url(\"https://farm3.static.flickr.com/2142/1896267403_24939864ba.jpg\", \".\", \"test-image2.jpg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y-m7lYDPb4oK" + }, + "outputs": [], + "source": [ + "import matplotlib.pylab as plt\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 303 + }, + "id": "5bi8Q7L3b4oc", + "outputId": "e5de5c73-e16d-4992-ebee-94c7ddf0bf54" + }, + "outputs": [], + "source": [ + "from torchvision.io.image import read_image\n", + "\n", + "tensor_image = read_image(\"test-image.jpg\")\n", + "\n", + "print(\"tensor image info: \", tensor_image.shape, tensor_image.dtype)\n", + "\n", + "plt.imshow(tensor_image.numpy().transpose((1, 2, 0)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def to_rgb_image(tensor):\n", + " \"\"\"Helper method to get RGB numpy array for plotting\"\"\"\n", + " np_img = tensor.cpu().numpy().transpose((1, 2, 0))\n", + " m1, m2 = np_img.min(axis=(0, 1)), np_img.max(axis=(0, 1))\n", + " return (255.0 * (np_img - m1) / (m2 - m1)).astype(\"uint8\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 322 + }, + "id": "PgWpjxQ3b4pF", + "outputId": "e9a138e8-b45c-4f75-d849-3b41de0e5472" + }, + "outputs": [], + "source": [ + "import torchvision.transforms as T\n", + "\n", + "# to fix random seed is now:\n", + "torch.manual_seed(12)\n", + "\n", + "transforms = T.Compose([\n", + " T.RandomCrop(224),\n", + " T.RandomHorizontalFlip(p=0.3),\n", + " T.ConvertImageDtype(torch.float),\n", + " T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n", + "])\n", + "\n", + "out_image = transforms(tensor_image)\n", + "print(\"output tensor image info: \", out_image.shape, out_image.dtype)\n", + "\n", + "plt.imshow(to_rgb_image(out_image))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LmYQB4cxb4pI" + }, + "source": [ + "Tensor images can be on GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 322 + }, + "id": "S6syYJGEb4pN", + "outputId": "86bddb64-e648-45f2-c216-790d43cfc26d" + }, + "outputs": [], + "source": [ + "out_image = transforms(tensor_image.to(\"cuda\"))\n", + "print(\"output tensor image info: \", out_image.shape, out_image.dtype, out_image.device)\n", + "\n", + "plt.imshow(to_rgb_image(out_image))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jg9TQd7ajfyn" + }, + "source": [ + "## Scriptable transforms for easier deployment via torchscript\n", + "\n", + "Next, we show how to combine input transformations and model's forward pass and use `torch.jit.script` to obtain a single scripted module.\n", + "\n", + "**Note:** we have to use only scriptable transformations that should be derived from `torch.nn.Module`. \n", + "Since v0.8.0, all transformations are scriptable except `Compose`, `RandomChoice`, `RandomOrder`, `Lambda` and those applied on PIL images.\n", + "The transformations like `Compose` are kept for backward compatibility and can be easily replaced by existing torch modules, like `nn.Sequential`.\n", + "\n", + "Let's define a module `Predictor` that transforms input tensor and applies ImageNet pretrained resnet18 model on it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NSDOJ3RajfvO" + }, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torchvision.transforms as T\n", + "from torchvision.io.image import read_image\n", + "from torchvision.models import resnet18\n", + "\n", + "\n", + "class Predictor(nn.Module):\n", + "\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.resnet18 = resnet18(pretrained=True).eval()\n", + " self.transforms = nn.Sequential(\n", + " T.Resize([256, ]), # We use single int value inside a list due to torchscript type restrictions\n", + " T.CenterCrop(224),\n", + " T.ConvertImageDtype(torch.float),\n", + " T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n", + " )\n", + "\n", + " def forward(self, x: torch.Tensor) -> torch.Tensor:\n", + " with torch.no_grad():\n", + " x = self.transforms(x)\n", + " y_pred = self.resnet18(x)\n", + " return y_pred.argmax(dim=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZZKDovqej5vA" + }, + "source": [ + "Now, let's define scripted and non-scripted instances of `Predictor` and apply on multiple tensor images of the same size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GBBMSo7vjfr0" + }, + "outputs": [], + "source": [ + "from torchvision.io.image import read_image\n", + "\n", + "predictor = Predictor().to(\"cuda\")\n", + "scripted_predictor = torch.jit.script(predictor).to(\"cuda\")\n", + "\n", + "\n", + "tensor_image1 = read_image(\"test-image.jpg\")\n", + "tensor_image2 = read_image(\"test-image2.jpg\")\n", + "batch = torch.stack([tensor_image1[:, -320:, :], tensor_image2[:, -320:, :]]).to(\"cuda\")\n", + "\n", + "res1 = scripted_predictor(batch)\n", + "res2 = predictor(batch)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 501 + }, + "id": "Dmi9r_p-oKsk", + "outputId": "b9c55e7d-5db1-4975-c485-fecc4075bf47" + }, + "outputs": [], + "source": [ + "import json\n", + "from torchvision.datasets.utils import download_url\n", + "\n", + "\n", + "download_url(\"https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json\", \".\", \"imagenet_class_index.json\")\n", + "\n", + "\n", + "with open(\"imagenet_class_index.json\", \"r\") as h:\n", + " labels = json.load(h)\n", + "\n", + "\n", + "plt.figure(figsize=(12, 7))\n", + "for i, p in enumerate(res1):\n", + " plt.subplot(1, 2, i + 1)\n", + " plt.title(\"Scripted predictor:\\n{label})\".format(label=labels[str(p.item())]))\n", + " plt.imshow(batch[i, ...].cpu().numpy().transpose((1, 2, 0)))\n", + "\n", + "\n", + "plt.figure(figsize=(12, 7))\n", + "for i, p in enumerate(res2):\n", + " plt.subplot(1, 2, i + 1)\n", + " plt.title(\"Original predictor:\\n{label})\".format(label=labels[str(p.item())]))\n", + " plt.imshow(batch[i, ...].cpu().numpy().transpose((1, 2, 0)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7IYsjzpFqcK8" + }, + "source": [ + "We save and reload scripted predictor in Python or C++ and use it for inference:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "id": "0kk9LLw5jfol", + "outputId": "05ea6db7-7fcf-4b74-a763-5f117c14cc00" + }, + "outputs": [], + "source": [ + "scripted_predictor.save(\"scripted_predictor.pt\")\n", + "\n", + "scripted_predictor = torch.jit.load(\"scripted_predictor.pt\")\n", + "res1 = scripted_predictor(batch)\n", + "\n", + "for i, p in enumerate(res1):\n", + " print(\"Scripted predictor: {label})\".format(label=labels[str(p.item())]))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data reading and decoding functions also support torch script and therefore can be part of the model as well:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class AnotherPredictor(Predictor):\n", + "\n", + " def forward(self, path: str) -> int:\n", + " with torch.no_grad():\n", + " x = read_image(path).unsqueeze(0)\n", + " x = self.transforms(x)\n", + " y_pred = self.resnet18(x)\n", + " return int(y_pred.argmax(dim=1).item())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-cMwTs3Yjffy" + }, + "outputs": [], + "source": [ + "scripted_predictor2 = torch.jit.script(AnotherPredictor())\n", + "\n", + "res = scripted_predictor2(\"test-image.jpg\")\n", + "\n", + "print(\"Scripted another predictor: {label})\".format(label=labels[str(res)]))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "torchvision_scriptable_transforms.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/python/video_api.ipynb b/examples/python/video_api.ipynb new file mode 100644 index 00000000000..724de2f0a12 --- /dev/null +++ b/examples/python/video_api.ipynb @@ -0,0 +1,772 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Welcome to torchvision's new video API\n", + "\n", + "Here, we're going to examine the capabilities of the new video API, together with the examples on how to build datasets and more. \n", + "\n", + "### Table of contents\n", + "1. Introduction: building a new video object and examining the properties\n", + "2. Building a sample `read_video` function\n", + "3. Building an example dataset (can be applied to e.g. kinetics400)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('1.8.0a0+7580962', '0.8.0a0+4db3dc6')" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import torch, torchvision\n", + "torch.__version__, torchvision.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://github.com/pytorch/vision/blob/master/test/assets/videos/WUzgd7C1pWA.mp4?raw=true to ./WUzgd7C1pWA.mp4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100.4%" + ] + } + ], + "source": [ + "# download the sample video\n", + "from torchvision.datasets.utils import download_url\n", + "download_url(\"https://github.com/pytorch/vision/blob/master/test/assets/videos/WUzgd7C1pWA.mp4?raw=true\", \".\", \"WUzgd7C1pWA.mp4\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Introduction: building a new video object and examining the properties\n", + "\n", + "First we select a video to test the object out. For the sake of argument we're using one from Kinetics400 dataset. To create it, we need to define the path and the stream we want to use. See inline comments for description. " + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "import torch, torchvision\n", + "\"\"\"\n", + "chosen video statistics:\n", + "WUzgd7C1pWA.mp4\n", + " - source: kinetics-400\n", + " - video: H-264 - MPEG-4 AVC (part 10) (avc1)\n", + " - fps: 29.97\n", + " - audio: MPEG AAC audio (mp4a)\n", + " - sample rate: 48K Hz\n", + "\"\"\"\n", + "video_path = \"./WUzgd7C1pWA.mp4\"\n", + "\n", + "\"\"\"\n", + "streams are defined in a similar fashion as torch devices. We encode them as strings in a form\n", + "of `stream_type:stream_id` where stream_type is a string and stream_id a long int. \n", + "\n", + "The constructor accepts passing a stream_type only, in which case the stream is auto-discovered.\n", + "\"\"\"\n", + "stream = \"video\"\n", + "\n", + "\n", + "\n", + "video = torchvision.io.VideoReader(video_path, stream)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, let's get the metadata for our particular video:" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'video': {'duration': [10.9109], 'fps': [29.97002997002997]},\n", + " 'audio': {'duration': [10.9], 'framerate': [48000.0]}}" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "video.get_metadata()" + ] + }, + { + "source": [ + "Here we can see that video has two streams - a video and an audio stream. \n", + "Currently available stream types include ``['video', 'audio']``.\n", + "Each descriptor consists of two parts: stream type (e.g. 'video') and\n", + "a unique stream id (which are determined by video encoding).\n", + "In this way, if the video contaner contains multiple\n", + "streams of the same type, users can acces the one they want.\n", + "If only stream type is passed, the decoder auto-detects first stream\n", + "of that type and returns it.\n", + "\n", + "Let's read all the frames from the video stream.\n", + "By default, the return value of `next(video_reader)` is a dict containing the following fields.\n", + "\n", + "The return fields are \n", + "- `data` containing a torch.tensor\n", + "- `pts` containing a float timestamp of this particular frame. " + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PTS for first five frames [0.0, 0.033367, 0.066733, 0.1001, 0.133467]\n", + "Total number of frames: 327\n", + "We can expect approx: 327.0\n", + "Tensor size: torch.Size([3, 256, 340])\n" + ] + } + ], + "source": [ + "# first we select the video stream \n", + "metadata = video.get_metadata()\n", + "video.set_current_stream(\"video:0\")\n", + "\n", + "frames = [] # we are going to save the frames here.\n", + "ptss = [] # pts is a presentation timestamp in seconds (float) of each frame\n", + "for frame in video:\n", + " frames.append(frame['data'])\n", + " ptss.append(frame['pts'])\n", + "\n", + "print(\"PTS for first five frames \", ptss[:5])\n", + "print(\"Total number of frames: \", len(frames))\n", + "approx_nf = metadata['video']['duration'][0] * metadata['video']['fps'][0]\n", + "print(\"We can expect approx: \", approx_nf)\n", + "print(\"Tensor size: \", frames[0].size())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that selecting zero video stream is equivalent to selecting video stream automatically. I.e. `video:0` and `video` will end up with same results in this case. \n", + "\n", + "Let's try this for audio. Note that presentation timestamps are different so aligment has to be done carefully. " + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PTS for first five frames [0.0, 0.021332999999999998, 0.042667, 0.064, 0.08533299999999999]\n", + "Total number of frames: 511\n", + "Approx total number of datapoints we can expect: 523200.0\n", + "Read data size: 523264\n" + ] + } + ], + "source": [ + "metadata = video.get_metadata()\n", + "video.set_current_stream(\"audio\")\n", + "\n", + "frames = [] # we are going to save the frames here.\n", + "ptss = [] # pts is a presentation timestamp in seconds (float) of each frame\n", + "for frame in video:\n", + " frames.append(frame['data'])\n", + " ptss.append(frame['pts'])\n", + "\n", + "print(\"PTS for first five frames \", ptss[:5])\n", + "print(\"Total number of frames: \", len(frames))\n", + "approx_nf = metadata['audio']['duration'][0] * metadata['audio']['framerate'][0]\n", + "print(\"Approx total number of datapoints we can expect: \", approx_nf)\n", + "print(\"Read data size: \", frames[0].size(0) * len(frames))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But what if we only want to read certain time segment of the video?\n", + "\n", + "That can be done easily using the combination of our seek function, and the fact that each call to next returns the presentation timestamp of the returned frame in seconds. Given that our implementation relies on python iterators, we can leverage `itertools` to simplify the process and make it more pythonic. \n", + "\n", + "For example, if we wanted to read ten frames from second second:" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total number of frames: 10\n" + ] + } + ], + "source": [ + "import itertools\n", + "video.set_current_stream(\"video\")\n", + "\n", + "frames = [] # we are going to save the frames here.\n", + "\n", + "# we seek into a second second of the video\n", + "# and use islice to get 10 frames since\n", + "for frame, pts in itertools.islice(video.seek(2), 10):\n", + " frames.append(frame)\n", + " \n", + "print(\"Total number of frames: \", len(frames))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or if we wanted to read from 2nd to 5th second:" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total number of frames: 90\n", + "We can expect approx: 89.91008991008991\n", + "Tensor size: torch.Size([3, 256, 340])\n" + ] + } + ], + "source": [ + "video.set_current_stream(\"video\")\n", + "\n", + "frames = [] # we are going to save the frames here.\n", + "\n", + "# we seek into a second second of the video\n", + "video = video.seek(2)\n", + "# then we utilize the itertools takewhile to get the \n", + "# correct number of frames\n", + "for frame in itertools.takewhile(lambda x: x['pts'] <= 5, video):\n", + " frames.append(frame['data'])\n", + "\n", + "print(\"Total number of frames: \", len(frames))\n", + "approx_nf = (5-2) * video.get_metadata()['video']['fps'][0]\n", + "print(\"We can expect approx: \", approx_nf)\n", + "print(\"Tensor size: \", frames[0].size())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Building a sample `read_video` function\n", + "\n", + "We can utilize the methods above to build the read video function that follows the same API to the existing `read_video` function " + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "def example_read_video(video_object, start=0, end=None, read_video=True, read_audio=True):\n", + "\n", + " if end is None:\n", + " end = float(\"inf\")\n", + " if end < start:\n", + " raise ValueError(\n", + " \"end time should be larger than start time, got \"\n", + " \"start time={} and end time={}\".format(s, e)\n", + " )\n", + " \n", + " video_frames = torch.empty(0)\n", + " video_pts = []\n", + " if read_video:\n", + " video_object.set_current_stream(\"video\")\n", + " frames = []\n", + " for frame in itertools.takewhile(lambda x: x['pts'] <= end, video_object.seek(start)):\n", + " frames.append(frame['data'])\n", + " video_pts.append(frame['pts'])\n", + " if len(frames) > 0:\n", + " video_frames = torch.stack(frames, 0)\n", + "\n", + " audio_frames = torch.empty(0)\n", + " audio_pts = []\n", + " if read_audio:\n", + " video_object.set_current_stream(\"audio\")\n", + " frames = []\n", + " for frame in itertools.takewhile(lambda x: x['pts'] <= end, video_object.seek(start)):\n", + " frames.append(frame['data'])\n", + " video_pts.append(frame['pts'])\n", + " if len(frames) > 0:\n", + " audio_frames = torch.cat(frames, 0)\n", + "\n", + " return video_frames, audio_frames, (video_pts, audio_pts), video_object.get_metadata()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([327, 3, 256, 340]) torch.Size([523264, 1])\n" + ] + } + ], + "source": [ + "vf, af, info, meta = example_read_video(video)\n", + "# total number of frames should be 327 for video and 523264 datapoints for audio\n", + "print(vf.size(), af.size())" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([523264, 1])" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you can also get the sequence of audio frames as well\n", + "af.size()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Building an example randomly sampled dataset (can be applied to training dataest of kinetics400)\n", + "\n", + "Cool, so now we can use the same principle to make the sample dataset. We suggest trying out iterable dataset for this purpose. \n", + "\n", + "Here, we are going to build\n", + "\n", + "a. an example dataset that reads randomly selected 10 frames of video" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "# make sample dataest\n", + "import os\n", + "os.makedirs(\"./dataset\", exist_ok=True)\n", + "os.makedirs(\"./dataset/1\", exist_ok=True)\n", + "os.makedirs(\"./dataset/2\", exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "18.4%" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://github.com/pytorch/vision/blob/master/test/assets/videos/WUzgd7C1pWA.mp4?raw=true to ./dataset/1/WUzgd7C1pWA.mp4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100.4%" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://github.com/pytorch/vision/blob/master/test/assets/videos/RATRACE_wave_f_nm_np1_fr_goo_37.avi?raw=true to ./dataset/1/RATRACE_wave_f_nm_np1_fr_goo_37.avi\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "102.5%" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://github.com/pytorch/vision/blob/master/test/assets/videos/SOX5yA1l24A.mp4?raw=true to ./dataset/2/SOX5yA1l24A.mp4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100.9%" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://github.com/pytorch/vision/blob/master/test/assets/videos/v_SoccerJuggling_g23_c01.avi?raw=true to ./dataset/2/v_SoccerJuggling_g23_c01.avi\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "101.5%" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading https://github.com/pytorch/vision/blob/master/test/assets/videos/v_SoccerJuggling_g24_c01.avi?raw=true to ./dataset/2/v_SoccerJuggling_g24_c01.avi\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "101.3%" + ] + } + ], + "source": [ + "# download the videos \n", + "from torchvision.datasets.utils import download_url\n", + "download_url(\"https://github.com/pytorch/vision/blob/master/test/assets/videos/WUzgd7C1pWA.mp4?raw=true\", \"./dataset/1\", \"WUzgd7C1pWA.mp4\")\n", + "download_url(\"https://github.com/pytorch/vision/blob/master/test/assets/videos/RATRACE_wave_f_nm_np1_fr_goo_37.avi?raw=true\", \"./dataset/1\", \"RATRACE_wave_f_nm_np1_fr_goo_37.avi\")\n", + "download_url(\"https://github.com/pytorch/vision/blob/master/test/assets/videos/SOX5yA1l24A.mp4?raw=true\", \"./dataset/2\", \"SOX5yA1l24A.mp4\")\n", + "download_url(\"https://github.com/pytorch/vision/blob/master/test/assets/videos/v_SoccerJuggling_g23_c01.avi?raw=true\", \"./dataset/2\", \"v_SoccerJuggling_g23_c01.avi\")\n", + "download_url(\"https://github.com/pytorch/vision/blob/master/test/assets/videos/v_SoccerJuggling_g24_c01.avi?raw=true\", \"./dataset/2\", \"v_SoccerJuggling_g24_c01.avi\")" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "# housekeeping and utilities\n", + "import os\n", + "import random\n", + "\n", + "import torch\n", + "from torchvision.datasets.folder import make_dataset\n", + "from torchvision import transforms as t\n", + "\n", + "def _find_classes(dir):\n", + " classes = [d.name for d in os.scandir(dir) if d.is_dir()]\n", + " classes.sort()\n", + " class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}\n", + " return classes, class_to_idx\n", + "\n", + "def get_samples(root, extensions=(\".mp4\", \".avi\")):\n", + " _, class_to_idx = _find_classes(root)\n", + " return make_dataset(root, class_to_idx, extensions=extensions)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We are going to define the dataset and some basic arguments. We asume the structure of the FolderDataset, and add the following parameters:\n", + " \n", + "1. frame transform: with this API, we can chose to apply transforms on every frame of the video\n", + "2. videotransform: equally, we can also apply transform to a 4D tensor\n", + "3. length of the clip: do we want a single or multiple frames?\n", + "\n", + "Note that we actually add `epoch size` as using `IterableDataset` class allows us to naturally oversample clips or images from each video if needed. " + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [], + "source": [ + "class RandomDataset(torch.utils.data.IterableDataset):\n", + " def __init__(self, root, epoch_size=None, frame_transform=None, video_transform=None, clip_len=16):\n", + " super(RandomDataset).__init__()\n", + " \n", + " self.samples = get_samples(root)\n", + " \n", + " # allow for temporal jittering\n", + " if epoch_size is None:\n", + " epoch_size = len(self.samples)\n", + " self.epoch_size = epoch_size\n", + " \n", + " self.clip_len = clip_len # length of a clip in frames\n", + " self.frame_transform = frame_transform # transform for every frame individually\n", + " self.video_transform = video_transform # transform on a video sequence\n", + "\n", + " def __iter__(self):\n", + " for i in range(self.epoch_size):\n", + " # get random sample\n", + " path, target = random.choice(self.samples)\n", + " # get video object\n", + " vid = torchvision.io.VideoReader(path, \"video\")\n", + " metadata = vid.get_metadata()\n", + " video_frames = [] # video frame buffer \n", + " # seek and return frames\n", + " \n", + " max_seek = metadata[\"video\"]['duration'][0] - (self.clip_len / metadata[\"video\"]['fps'][0])\n", + " start = random.uniform(0., max_seek)\n", + " for frame in itertools.islice(vid.seek(start), self.clip_len):\n", + " video_frames.append(self.frame_transform(frame['data']))\n", + " current_pts = frame['pts']\n", + " # stack it into a tensor\n", + " video = torch.stack(video_frames, 0)\n", + " if self.video_transform:\n", + " video = self.video_transform(video)\n", + " output = {\n", + " 'path': path,\n", + " 'video': video,\n", + " 'target': target,\n", + " 'start': start,\n", + " 'end': current_pts}\n", + " yield output" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Given a path of videos in a folder structure, i.e:\n", + "```\n", + "dataset:\n", + " -class 1:\n", + " file 0\n", + " file 1\n", + " ...\n", + " - class 2:\n", + " file 0\n", + " file 1\n", + " ...\n", + " - ...\n", + "```\n", + "We can generate a dataloader and test the dataset. \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision import transforms as t\n", + "transforms = [t.Resize((112, 112))]\n", + "frame_transform = t.Compose(transforms)\n", + "\n", + "ds = RandomDataset(\"./dataset\", epoch_size=None, frame_transform=frame_transform)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from torch.utils.data import DataLoader\n", + "loader = DataLoader(ds, batch_size=12)\n", + "d = {\"video\":[], 'start':[], 'end':[], 'tensorsize':[]}\n", + "for b in loader:\n", + " for i in range(len(b['path'])):\n", + " d['video'].append(b['path'][i])\n", + " d['start'].append(b['start'][i].item())\n", + " d['end'].append(b['end'][i].item())\n", + " d['tensorsize'].append(b['video'][i].size())" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'video': ['./dataset/2/SOX5yA1l24A.mp4',\n", + " './dataset/1/RATRACE_wave_f_nm_np1_fr_goo_37.avi',\n", + " './dataset/2/v_SoccerJuggling_g23_c01.avi',\n", + " './dataset/2/SOX5yA1l24A.mp4',\n", + " './dataset/2/v_SoccerJuggling_g24_c01.avi'],\n", + " 'start': [2.9344678384893816,\n", + " 1.6827470772443045,\n", + " 3.9380918322335887,\n", + " 8.400625043794742,\n", + " 0.9696198736175933],\n", + " 'end': [3.4367669999999997,\n", + " 2.1999999999999997,\n", + " 4.471133,\n", + " 8.9089,\n", + " 1.5014999999999998],\n", + " 'tensorsize': [torch.Size([16, 3, 112, 112]),\n", + " torch.Size([16, 3, 112, 112]),\n", + " torch.Size([16, 3, 112, 112]),\n", + " torch.Size([16, 3, 112, 112]),\n", + " torch.Size([16, 3, 112, 112])]}" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualisation:\n", + " \n", + "example of visualsed video" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pylab as plt\n", + "%matplotlib inline\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "for i in range(16):\n", + " plt.subplot(4, 4, i + 1)\n", + " plt.imshow(b[\"video\"][0, i, ...].permute(1, 2, 0))\n", + " plt.axis(\"off\")" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "## Cleanup\n", + "import os, shutil\n", + "os.remove(\"./WUzgd7C1pWA.mp4\")\n", + "shutil.rmtree(\"./dataset\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5-final" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/examples/python/visualization_utils.ipynb b/examples/python/visualization_utils.ipynb new file mode 100644 index 00000000000..2f042cf02c8 --- /dev/null +++ b/examples/python/visualization_utils.ipynb @@ -0,0 +1,683 @@ +{ + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6-final" + }, + "orig_nbformat": 2, + "kernelspec": { + "name": "python3", + "display_name": "Python 3.7.6 64-bit", + "metadata": { + "interpreter": { + "hash": "b59c5859fdaa326f162dbe4b890c245edf044b3a52376874fe660daf6e3b88fe" + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 2, + "cells": [ + { + "source": [ + "# Torchvision Utilites for Visualization" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "`torchvision` provides utilites for visualizing images, bounding boxes and segmentation masks.\n", + "\n", + "All the utilities do not perform inplace modification of inputs.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torchvision.transforms as transforms\n", + "import torchvision.datasets as datasets\n", + "import numpy as np\n", + "import random\n", + "import scipy.misc" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "%matplotlib inline\n", + "def show(img):\n", + " npimg = img.numpy()\n", + " plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')" + ] + }, + { + "source": [ + "## Visualize Grid of Images" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "Use `torchvision.utils.make_grid()` to create a grid of images.\n", + "\n", + "You can also pad, mormalize and scale the images on the fly.\n", + "\n", + "This utility can take 4D mini-batch Tensor of shape (B x C x H x W) or a list of images all of the same size." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision.utils import make_grid" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([3, 768, 1024])\n", + "/home/oke/Aditya/PyTorch/vision/torchvision/transforms/functional.py:114: UserWarning: The given NumPy array is not writeable, and PyTorch does not support non-writeable tensors. This means you can write to the underlying (supposedly non-writeable) NumPy array using the tensor. You may want to copy the array to protect its data or make it writeable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at /opt/conda/conda-bld/pytorch_1614931498178/work/torch/csrc/utils/tensor_numpy.cpp:179.)\n", + " img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()\n" + ] + } + ], + "source": [ + "lena = scipy.misc.face()\n", + "img = transforms.ToTensor()(lena)\n", + "print(img.size())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "imglist = [img, img, img, img.clone().fill_(-10)]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:48.421838\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAABpCAYAAADBa2OhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAACky0lEQVR4nOz9d3Rj933n/z8vgIveQRAECPZehzOc3vuoWN2yJUuWbbnGdrLJxo7jxJs4ieM4Tuy417gpKpasYnXNaHovnMYZ9t5AAETv/f7+kDcnu7+d2axX/kabw8c5PODlJXHveYF488PP/dzPR5AkiWXLli1b9p+L7D/6BJYtW7Zs2dtvubgvW7Zs2X9Cy8V92bJly/4TWi7uy5YtW/af0HJxX7Zs2bL/hJaL+7Jly5b9J/Q7Ke6CINwiCMKIIAjjgiD86e/iGMuWLVu27MaEt3ucuyAIcmAU2APMAxeAByVJGnxbD7Rs2bJly27od9FyXwuMS5I0KUlSDvglcNfv4DjLli1btuwGFL+D56wE5v7N9jyw7n/+JkEQPgZ87Debvb+D81i2bNmy/+wCkiTZ/1c7fhfF/d9FkqQfAT8CEARheQ6EZcuWLfs/N3OjHb+LbpkFoOrfbLt/87Vly5YtW/b/kd9Fcb8ANAmCUCcIghJ4AHjpd3CcZcuWLVt2A297t4wkSQVBED4N7AfkwE8lSRp4u4+zbNmyZctu7G0fCvlbncRyn/uyZcuW/TYuSpK0+n+1Y/kO1WXLli37T+g/bLTMv9cff/ovsFqMeGfnqW1vpcJRTX//FUYmB2kvK8NW7eLY6T7Wr9uExWKhsaWZvlOnmPQlUIp5Wt1uQukE7uoqxieGCC36KZbyzM/M8+BDH2do9ByhYBShWCCRylDldJHMJYlEIhiMdvLpGCt62pHJdcikHNMTQ/T29pIVBIxqPeNjkyRTUVpbOvHMz5PMJ9GqzMj1BlIhH75whHw8RVoAZbaI0WZkbGqRTEEgL5VIJEPEYjFUKhWiDpTWJcorJQSzlsYWN0/+0/Nkgga++q0/5ev/9D1QSwi5AumkDJlOQWN7Pc32SrIJH9/56uEb5vjnf/wVlEoI+5Zo6OpAQI3fv0gg7MUmSNgqXbx57BS7d+3DbLNSU1PD1fPnOXVxhLaOWqxKJTOLCzS1t7LomeHSuQu4KivwzHl46IOfYmjoDIsePxazkZHhMapdleTJU1tbSyicwqBR0NBYTSicxllmZnpqGLvdjr6sjEIqw+jIJLYyAxWOaqYmJpjxz1HjbEBbZifm8xDLpIj6guRFBVIshdqgxhdMEk3mKCCRTIWJx+Notdq3crT5sdjzqJ1mausdPPn1X5MJGvjiP3yK73/3MeQ6GVImRy4jIteL1DbVsqa2hfn56/zoG6dumONffu4fKRRSZON5GrubWVqKk88niSdC6PN5HDW1nLp4mbW967Day7Db7Vw8fZpJT4TVa7vQlHKcvXiZmsZ6wiEf1y9epr6pnumJaR7+wEeYmRtibtaLTJC4fm0Qm8mM2W5m9dq1zM/5cZVbSGdiFNFQX1lOPLbE4OgYDS3NpCIR5DINaq2AQqZjYW6ORX+QmpoaVBYrhXiEvFTCO7tARiYglkrkc1mSWYFYKk8ymyGZCpNIJFCbDeiUEooyLwZzHmOtlaqacp7+5quE/BY+8oktHHj9LAqDglI6TTGvRm1V4aiysadjK0OjZ/jB10/8LkrCsn+nd3zL/fq1S0wPTlDT2EwqnmDg0iWy2SwajYakRo3BUklgfo5ssYDFYScUClHX2oRdr6OjoZn+4UHkoo6wP8DC6CTBaILu7m56V63j4uWTLPkjyDUadt52J7v37ODsuRNEIhEcDgcnDx+gs7OTiYkJrly5wvlzJ2hsbESpVJKMxpjzBvAG/Gg1ZmKxGP5YHKveit/vp+/ceVLxHHKZilC+SDQUp6DWoFTIqa9xkU7HKeVjKBQKSgoZMrWSdDhL3C+R8JkxmdU8/c3nSU47+eyffZJvfv0HiFoF0aU0kXCC+jYnK9e3YqvUc903QKhw8xzPnz9JcilGJJkmFo4wOzmOSqUhlUoRAmQqE4H5Oabn5zBazJRKJRo7m1nV2Y5YhNHpSQRRi06pxj8zj8FSRnd3N81N7Vy9dpax0WkCsRgbt+xi85YN+JcWMBqNBAIBxgauolarGRoaYmFuhsPH9uN2uxEEgbGhYYbGZwlGw8zNBhAEAV80xprOXkKhEIf2HyAaSZDPlYhJAj7PEgqzhTKrhXKrGUnK/w85SqKcdDhL1FciFTCiN4i8+N39JKYc/N0//jk/+MbPUOmVRPwpopE0zV1VtHRXY6/Wc3rqPGFJvGmOb7z+MuqSiCceIhIM4ffMY7NWkE6niSkUzHmWCM55GB4fQ63TIkkSa7aso625ieEr10hmc8iVBjpb2oj5AlTWNbJu3Tqq3HXMzk1w9cowUwsLrFm/lbXrenG6ymhsbGRmbJRiOk4wGCSdTqNXi5y7dAqj0YhaLhLw+fGFUpztO8/Q9TmcTieCRsvq1asJh8OcOHKUyalZAkthkjIFkWAMpcWFw2ZGr1ZSKGT+xxyLSdKxPPElKKSMaPVyXv3eUaKTNv7qL+7l0MsnEI0iYU+YWCRL66oaKqqtiMY8r189iCdTfPuKwLLfyju+z/3l548zNjaBUafn4PE3SEUTlNlt1NXUUsynmR6fZGXvak6cPEn3qk48i0H23PYugokE6USctqo6FuenOHP2AvZyM7Fslh2bt/La6y9SyAtcH7hCR3Mnx04cYkXXChpbOwhHltBqtVgNFnxLXuxlJor5ErlCHkd5GXK5HKVSSSQax+8LY7JZSUQTuFsaiPgDOG128hIsBYIYNXLGJ2bIZFMk01msZTaCSwG0aiVL8TiqUh65KGMxkCJeyqKWxclrZYilFKF4mN/7vbvQWlV86/uPIRNU9K7uxhdcRKexUpBlSBVyeAYThBaX8A/Hbpjxi88eZWRkjCpXJWcvnSQTT5Ev5LCYjAhSkaFrA9x197sZHR9BkhfRaM2sWreBZCHHhbPn2LFmA0H/AnPzHrQ6Jf5olC3rN/LyK88TDiWYX5imzGQnEPLS1tJGMlugWMpSXl5OMVNAEkrU11VTKhQJhkMY9FoMBgPV1dV4fUtcvjJAU0sLAV+AmtYmlDI5SmTEUmmGR0ZpbajCtxTA6w9QzGfRG80Eg0H0WjWBVBIhmQR5iaVghlAhhVGdpWSUI8slicRjPPzINswVJn7yixfI5Yqs6u0iFAugVVvIkSJdzDN3LYp/3kN0MnfDHJ/8xatMTEzhcjsYHLqGTqlhfmEOuQB6rZoTR46xZedGaqobuXDxMo3NLfSs2cD14QH6+vp4//13MD8xT75QYmR8BLPVQkdbK0eOHsTn85PP50lGUlRWOSgvq2B4fAKlSkZrYz2zM4uUV9hpaqxDkiSCwSByGagUIt3d3Xj8QU6cPMOKlSvxeXwYym101jeRSCSY9SwQCEZorqskm88xPDpNNJLAVmYkHomiVCmIFIrkg0GypSyRdIFwKoFFlweriJB+K8d97+qksbedX/z4GTLpIl0rmomno2hUZrJSEllRzvUri3hn58jM/9/VhWX/Ljfsc3/Hd8vU19dz8dIFXnn+STbt3k3fiRMsLiQx6+RUuerQGU0YTSbmFmeYf2mUe+59kLHhIU6cOkm908nxNw+wdfN6quorkWRyZs+d4qRMQK0xY6owMTY9idqgoqOjE1uFHb3RAICUS6EQBWqrKulZvZWKOjdCPs/FK5dJBP24XG6mp0/T2NqGSiFCtUiuUIC8xIzPT2tdPadOnWLX9m3su6uDxFKAi5fO4/P60epNRAMeVDI5FrudoH8JjVZBKhgmlEqgymixVGnwBGb5yU/2U1YhIhQVJOOwFIwgk2tJp9NIJSWBQS/xUBatTAfcuLhXVlZy7PgRDr/xa6pbWpidGKPS7SSXDtHY0EE4FCVfzHF14DK5dIR3v/sRBvqvcuTYUartDn4x8hN2796J3VVGKBpjZOAqQqmI01WHqAygMeoRijnKK2woVCJrelah1+q4cPo4zkoXOo2aO+55iJIoIEoS5y/2EQ/4UCr1LPlHWLVyNXq9HoutHJlCQTwQIqfXIcrkKOQyDEYrok5HW0szV/svEY3E0RtNLM5NUCyWqGmsZ3F2Ho1BiSYcJRxKoisaqag2MO+f5pe/PI3dqSSdyCDDQCAURSaqSSaTFPJylkaCpMI5LEobURZvmKMokzM3P8XoxACLi4soS3kcDjsloUBzczfhUBSHs5JjJ4+y5J1ly6YNjI8McvXCRZrdlbz2wiusWr8Wh81GOJPk8vmTqEQF7R2rSGROY7M6Cfrn0Wu05Mlz9733YLOYOXvqMCtXdSPKFey7/X5KokApk6F/oJ98PEQmDzPTs7S3dVJuK0cSFDicTpb8IczOctRLQUxGAxqtkcnRQVZ0tDI5MUo6nUZrMuBbmCGdzlDd1IBnaga9XkkuEyUWzWKQaSirNuINzXPowBB9V8aIheOY9S4i8QQyhYpkMkkyUSI4HiEfz+I0uJjC83aXg2X/B97xLfc//eO/4+KZY3R2dpIIeRidnMFoMhOKhFm7bgWHDx6huqqFcxdOc8fddzI3v4hEkc6u1Rh0KqzWcp575kW6epporG/A4/HibnDz7BPPsn37drzzYxitTrxeL9lUlN5VaxmbmsZu1rNhw2a8Xi9l1VWYLGYW52Zpa11BrlBgZmKc555+ErlKjd5iIRFPoVFKJOJZFBoVWrWFxjoXGoOJ2dl5OtoaMZvN/OqJfyGRyZLLpBDEAiatFdQqotEooSUPmUKRdCaHpJCh0csQbXGSySSV7hoMFSoUohq5XEbUs8TkYJB8Mo9cVFPVYOLkyzcecfrZP/wSV86fZMWKFWRifvoHR1BrtCRSCbq627hy6TK9qzZz7sJp2rs68C+FWAr42LHzVmKRAA0NLRzcf5SG5krWr11HJBJDUAs8/pPHueuuu7h84TjW8io0WhUhv5+O9k7O9l1ky/rVKJUa8vk8Vncl5eXlxMIRWtq6SMRizE5P8dzTTyJqtJQ5nfi8S1hNaqKRFGqDDoVCj0ZRwmwpQ1CIrOhqJZPJ8MwTv6CIHEoFMoU4ZSYHGSQSiQRB/wIFBLK5AkUZ6EwCcnOCRCJBS2sHollCkInI5BL+SQ8LEwmK6SJyUU1rdwVvPHXhhjl++uN/ztDVC6xfu5JcIsz5K9fQ6U3EkhEcDjvhYIjmlk68Xi+iqCAYjRMJB7nvPQ9z8cIZVnSuw+v1kkgH2LdnL2azlePnjnPi4Ak2rl/LxNh15CojjU31eOfnqaut50r/Vfbs3IXH48FisWCpdGE2v9V11tTYxuLcLFNTUxx841UUah1mu41YOIZepyCSyGCxWBAEDelYgHyxxIqVq1nR1UokFOb5p58gXyqRz+eJp0M4rC4imTSirMjCwgK5kkQ2VwBRjs4kIRgTpCI5OnvbEfRFEBTIhAJzI/P4ppMU8wUUch0r19fy65+e/L8rDMv+Pf7fbbm/8sor/NEnPszFS1fp3ryDYOxVUskYVouZbEaid81q6pvbaW2rYXB0kn179hJOxGisa+bSxavU19ppanQRi4Y5fPAAm7fvQEqm6e7upESRubkg1XI19dVVWFzraaisYN+tt6EUZQyPjKIQRY4feJ1wIkJ3+0oMehPV1dUU82FUahnzXg8tdjuFbB6lyUEu6sVuqiAUijDnCRILT1FeVcHJcxcpFvOkkGN3ubAaDCjVGtw1LsYn5ijmxlA43ATDYUpSHIUoQ65OkwlKVHc0obaLqDUGZEKBoYuDRKYLCJKEXq9l4971KDTiTYv7yZMneeT+u/H6AqzduoHZxX8mm0nirqykVJTR0dWJ3W1nVamLfEnkgXffx9j8HM4yF9lyFzJBQWODk0DAz5FDb9LevQJVEdraWkik4ogKC4VcBq3JiK6plZWreti1Zy9KUUYwFCYYDPKLn/8EvcXExg1b0Ov11NbW4vfkKLObGJtcQGk0UMjm0ZvqmF8cxuEqRyZTUCzmiYRT6KwGXj94lGIxj9JUhlKU01RbiyQoEBRFvL4wvoVZtNUNhKNRQpEwalkRhZgnEypS096MzCIgagwo5SWuX7hOeLoApRJGo5aNezciVyt46z68/7Vr167xrn37UItanFs2c210GlEhUF/TiFyUYzQaKQoF5IoSjsoq9u3rYC6wRD6b5T33v585zyySJ0EmnaTv/FmsZeWUafXYLCbCoSXq67qZmh0ll0yjt1ewqncle/bdQqlUoL2rk3Q6zTe/8TVUei237LsdjVJFldvJ0uI4K3o6OXjkJEqjjmQihdVSQyHnR6u24HZXMzYm4dKbSWVzPP/y6yQzaUoqPSpRRrPLhdFsIxjzo16KEfDN0dzSyaLPRzgaQSplkStKpENFqlZUUzJIKFV6FEKOkYszBGbSUITyeitrVq/7TY7Lxf0/0jv+gurenTtwVNfQ1NaKb2KRTdt2o9OoqHJWoFeqWZhcQKsQiUQzqFQmAn4/xVSGf/7nH9HQ5MJoVrFhy1YmJ2ZZ2buOZDKJo6aWdDxGxO+nY2UX/kSSqqoqQosLqJVqLl6+SkmuoqmhGXddDeUOEz6fjzcOvsq5c0cYG7vKylWd9K5eiUGr5UrfBebmFqivr6XcXoFcLmIwmlBqdBhMViYn5sgk8hTycvRKM3JBTypdQKO18srLBxA1OqwuNyaNGZvZhsvmRKXUoJKbKGsow2TUoREMqASB+Ys+QmNZ0vksG25bzY57t1LudqM32G6a465tW5nz+smUClw41seOPbdhMuhoqqvFojOwOL1IPBhCpzVTU9fK6Og4UirLCy88RzA8j94oYrM7CIfitHf2/GuOWqXI9Ogoa7esxVZZRUdHB/JCDqkoMTQyhkpvxqg1YrJa6OhsZGnJxyuvvsDx42/Q33+WxqYajCY9CpnE2MAAoVAEtVqJVBKYm1sgEo1RkAQkQcn8nI9MIg+SCrPaRi4jZ3BokkQyz8W+a6h0Bgx2BzZDGWpRTWN1LVqdCVHSYG9wYDEbUMsMqAQ5Z166jG80TSqXYsud69l53w7slZXo9Nab5viuPXs4c/4ik1NjjF4aZUXPamRAZ0c7Jo0Oz9QisWCW3lXrWbd+JyMjY4QWvFy/3s9rbzyHSWugKAlUV9VjszvJ5NMY7OXU1VQTy+VZuamXvXfcwa5duyg36sln8wyNjGGx2cmn86h1Wlpa64hGw7z40rO8+eaLjAxeoaW9gbGJUaRijomhIerrmikJJZLJNNPTs8wveChJMsLxNDPTHpLRDFJOSYXZRToJkWiGyakFFmb9aI1mtHozRrUJoSTQWF2L3mBBXlRR3liBw+pALTOgRMbhp8+zMBInR4kd925m0/Zd2JxONFrz21cElv1W3vEt93vvfYQLF87RuXIls/YZDr/xOmvX7aDv3EnKV1RiNZdTTOWoq65i3+0r+Zd/+RkmrZIVXZ1Uuhv41QvPo9MqWb12Hc2dnTzz+OP8/OeP8fnPfobGzmZEuZ39rz5NOlPAYrHg8weQyWQMXLqAWqvilVde4Z57bmV4eBjPvJ+p8SmsJisuRwW9q7p4/fXXcVgrcJbbeePV13j00UeZXYqQXgoz71ukIEEuncKkc1Aijd5kwu6qQKVSMTM1TWWFG9/CPMlUBLPFxuTkJKvXrSWeiWE1m6lrbGfTth0oFAqikTh/c+ELdG4woDaBwWRAo7FRLGWRy28+XKa3dwuvv/46H/zY+zh29DCzs9NUupsZuDZIVV0tRr2VcpONUCJGW0sj//KLn2Iz67hl7z5qGho5fvokJr2G5vZO6ltbOfjaa/z854/x6CPv56GPfIh8Tkvh7EECkTiWMhtj45NojSZOHzmI0Wzg4MGD3HrbTmZmZggH44wOjeIoc9BQV8P2bRu5dOkSNa5GKuw2Zian2LFtEwuhJOpCiTmvB5lcJBIKUG6vQCbkUao1dDfUkS8WSMTiuCtq8M7PUSimUSm0FNJ5mlasxBYNoBJF1m7YRs/qtchkMgYHhrn22l/TvEVAa5RhMBlQKs0UihkU8tJNc1SozKRSMm5/4CEe/8VP2bJ2DbNTkxw7dBhrRTkmo42WBhdezyL1zR1cvXoZq8XInt23o9JomV+cpaOnk0VPgMb2dvrOnuG73/o+G9b08JnPfIG5WS+DV68hFEBvMrIUCKHS6Tl+5AA2m43Bs4P09vYyvzBDOlFkoH+A+upmautruPfuW/nud3+C3V6HrJDCoNPT3d5GvATpUJSCHJLJKEgSOq0BUaVGosDatWsJRcLIBRlKuRLf/DySANFcFpulDHddDRqDHrfLxbqN22nrWkEul+Pll15lpCxKeXsJnUGNwWRAoRHIF9KI7/jK8p/fO77P/aXnzyEq4Gff/jp1zfUotToqXW5Grl+hvqmZibEhJiaHuf32ezh+/CTtPd2cOHGGTNTP6t4tFJHoWr+al554hsamajweD93rtlLjcNA/2s/ivI/O9haeeeZZPvv5P0PKRGjtWEUhHWfB42Mp4MFoqeDAa6+Sy8VYCszyvve9D1tZPXOeYc6e6gO0VNfW03fxCna7nYbaNmz2MrxLfvoHBqmudFHurCASTuGdm0Cl1xKJRGhtbsE778PqUnH+/BmMujIy0SgWs5n7H3mI9tZG1DoLAyNjeL1eDAYt9ko3X//5H1LusGA0lqPWKAgsznHl8jUuvj5xw4zPn57h2NGDeEYHKAhFWjo6SacyXLt0HoutjGIhg88/T1fnKgpFiWQuRSYrMXy1j54V6wiEQ6zftY0rx0+h1orEYjGaV6xFns8RyUQZHRqnd2U3A/0DPPToo2SiPtw1zSiFItMz80zPjNHW2cuTj/2CTCaCSl3itttuw2iq4tjJ1/AuBCiV1PSuWcehw8eora2lp2s9Y9PjiKLI0PAojvIytAY9GrWFhelh4qkkHu8i3Z1d6NQatBYZJ04chaISsVTCUV7Otlv2sKZ3BSqtmaGxCcLhMHK5gLOmim88/seU260YjeUoVTI8M2MMXB/l8v6pG+Z4pW+Rr/7932IzqNCpldQ1NrDoCzA6cA0As1FDJLBEuctNZ89K5rzzOCqqee7Jx6iubsKfjtHb24u+JBCKLJHNZimvaSUTCeLxhZCEHE0NtZgNZlpXrMA7M4arqh6zXsfc3ByXr5xn87bd/OSHPyKVCtHcUsWGzduRoeXQ0ZcI+uIIgo61G9Zz5ep1KlzVNNW2cGXgKlqtlqHhYVxOJ+lclo3rt3D88AHiqSSFUhFRrmBldxcFIcn5C5dJJBIoAbkEq7ds5L577kCtszAxM0cgEKBYzFNRU8kPnv8Cdstbv48KhYLZyWuMjkzdNMdlb5v/d/vcX331V5TpNZRUekZGZlAp9cSCYawWK5KgYGXvOurc1Rw/exadVo3Naqau2o5nrkg8HWL37Xdy+MDrrFjdQyDgJxiLoFWKvH5kPxRL1NfX46qs4PN/8WVkxTQJSSBXyCNTadEbNExOhjh5+gSPPPwhDh56DbPJSjpVIhH3UWEpw6SVcfrcWbQWA/VNzYQWQpRVViGXy5HLgzRUVpPKpzl/8jQtzR24K5wkMmk6N2zl3JlT3Pfgezl74QzKooAoyEjmU7zngY/S1dHI9aFxLl8dJBiLkIpFGJ6c5RMf+wgVTicGvRpRBcePHCafyiFJ8pvm+NTjP0YlKzHtC1PjKieTkhHyLVJd04DOoKfcbkPKdvHc/tfpbGqioaWRC2dOUl1Tj9Yosn3dLoauXKKupYlYPMSFK5foWbOJs5fPEwiH2LltOzVVbuqae8gkkgQjSTTGEJWVlWi0SkrFLL/4+Q946MGHOXJ0P9FQAp83QlVVHTs2bOTZZ5/Bs+RnbNqG2VZGMS2gs1ioEVrwLc7QWt9IMpdifnIadyUYtRrUajU2azlqUaRjZQ/Tc1Mo8hI2q5Xg0jwf/fijlDsdTM56uHTlMLOLC6TjUYYnZ/ncn36G8goXJp0amaLIqeMnyCWzCKhumuNzz/yCCqsGoaTFZDbR3LqGyYkncVfVotWpMBqNSCUZx84epTJYQymf59TR19iwcSt2hxW5aGFhbpCMzoxcreTEsSN8oKWL09cuEw362bpnH85KN0qNmaDPSyyZQVr0YKivRyFKaDUiP/nxD7jn3js5cvRN5ueCxCMRamvN7N26jV//+nmCsQTeoI9oNM7ajXUYy6zs3H0bwaUFipkCRpsJn8fD0PUxRAHsVhulosCOnduIZZLkC2liiwdwuSvJZaJ8/s/+HFOZhUAkwalzRxgYvE40GsYbivHHn/k0ZbZyjDotElkOvfkGsqIMJOXbWQaW/Rbe8X3uc1MTaLRa6urq+f0/+BNqO6rYtHkXSm0ZfWfO4JtbIJpOImWLbNt7G4def411qzeBJKFQqJgYnaClvYdyVyXpYhGZXMVLL/ySDes3YbGX09rexcTEAkadDJPFzLr1mwkv+ckmIix6Jrl0/hzX+wf466/8HTZDOVabnpdffhVRlHPkyHHsrno++OGPoVXpCS55qO9upLKykkw+R1vHCtKFDKKooKqmjnQuTTydQqXTkS9k2bRuDZPjQ9Q6rPT0rsRmFvnU7z9KpbuMQDDCvHeO5198iQOvvsC1q300V1ei1mRRKUROHDnCm68fIZMuUd3WwM47tt00x8CSF4WoQiaT0dK2hlDCS1NLD6YyFyODQ0wMj5Iu5mmtb2bzjr3sf/ll9uy+jbm5GTzzXjxzHooKJTqTmUAshkqp46UXfsmuPfuob2iia8UqrvYPUWZSYbZa2Lx1GxpRQTISYMk/y9mTJwj4l/j5k0/gsldiLzdx4sQpvN55Ll2+ztrNu9m0dRsySYZBpyaYDWM2G6ltqKe5tZOirEQmk8FsLUMmyijJ5DS2tuKqrMBuMTE+NohJKdC7djVVVRY++vH3kStlWfT5GJsa5qlnfsVLzz3F9f6L1DnLGR09i0YucvTgIQ7tP0YqWaC+q4Xdd908x1g8jCiqGBwfIZKQePPoK9RUtbN6w1aK+RJzk9NYzCZ6O3roXr2WSxf62Lf3XZw9e5qBa9eZW5xm1udHqzeQFwS0GiNjw1fZe8ttGBwVrF2zgb7zF7EZ1RjNFjZu3kJDTTX5Upol/zynjx9jye/l1TcPUl/ThN1u4dTRE8zMTDA372PzztuorWtgYnSCcqedQ6+/jiAISHIZnqUQ1Q01b91wlUijUckwl9lZu2kDBouBsyePcvzEYWJLXjq6GunpaeSDj95PJBVhdn6ecxfP8OOf/owDr7/CyFA/BlHGhUuH0Ikqjh48xNE3T1OSoGVVJ3vu3v42VoFlv413fMv9449+lGgqi15U8dIrT9HS0kYilaKYi3H73Xdjszl4+fknaW1uYWZ8ipqKKva/+jLRaIQ/fPgDnDp1ivIyKzPzM1iMWk6P9lNT08D5g4dxVLoI+T2cPnGYKreT1vYOJiamUCjUXL54GVelhawkoTdY0StFaurKiUdAUeogEEihVOvJZCAelaipbqSmvoPRkSmCoSXivkXioSVsVjvuKhdBr5+atk4CHg++JS/FUom0DMaGBhDlBSKRCOl0CEuZjdHRYTR6A9PDl7AYVSRLejQaE3qdnSd/9TizgTEEmRxnjZ2KWhdueyMy4eYtzh2bt6Ez21i7Yh2esB+NQo3FXkYo6GHnnt0UJQWH979IW1sHVy9doaW2iZeee450Ksneu+8llUphCOsYHb5OZ2sLF08cwWZzcP7gYfJSCc/sJNev9lFXW0Vn9wqGhkYoFYr45qeodJeRzOcpFGU019ZgLzdSZlXgsFgAPaJKR9+5fixWJ2aTHZXOSKfFQQGJi0cPshQJU+5wolAoKLfYUFvt5JNJovEIjU1N9F04i1DKMrU0y+joKHq9nHUbe1lcnEOhUDB06QwmvYg8b0enNdNQ38lrB58jlg2BIMfdUEF5dQVuexMy2c3fEplokq2338nG7bvxzM5w9epVtm7ZzfTIVfKFArWtXVy83E9NlYu+cxdwWO08+9QTWC1GXPUttLe2oNNoOHP2GHv27OGqskgwsIR4bYDFqVm889MUcikuX73C+vUbuXKln0QsSjGToKraQSybhaKWXVs2k4wt0dlSy8xiAkEwkM2nGDx/jWAkjd5oIxLNcP8D7yeSSjBy6iQj42Ps2XsL0WiUhppqWnrWMjU6RCyaZNv27Tz11C+odNiZn50gl0uxFFgkmalkKTKJVqulv+8ERp2CQkKNTmtmw/rtvHbsZ0QzERDk1DQ7cVabcNg6eWsp5WX/kd7xLXdfJEN7Tw8L4RBlZgtHT52gWMpSVl7O1bMXWVpaIhBI0dTSCTIF9S3d6M1W6mobOfbmATKRHFKxhNtVwczUNH/0J39BOBxmMbDAxPQUCgTkcqhtauD64CDOChdarZaauioEGSiQMOnVfOiDjwAlKsscuKvriEYyOCuq6e5Zy5PPP4/WYCXo8dPe3s7gtREKMjkGq42dO3fSf+k8CwEvw9ev4pmbJZWMMHCljzfffBOlWkVBrkCtUbL39lsYGhjE7w9w6vgxnnrqFXKpJNX1TWzbtoM979pOMOzBWK6huquaqhY3blstcmQUiv6b5qi3VWKy2znSd56ha/1Mzs2w6JlFpdHQf+ka0WiUQkFJU0snjc2tyFRGtCYL69ZuJBEKMje2gF5rYMf2rVy+eInPfeFLhMNh5rwzDI+NEvT5EUUZK1av4uq1a9TV1uN0OqmqqaQk5VHJFFhNOlb3dJFMxik3W6iur2du1o+Aki079jLp8WC2VTA3Pk06nWbg0gAzi15aOzppamgkEQnQd+0yIf8i169eIZWM8OqvnyWdy9N//Rp5mZzqGjdbd20nGo5w8fxFTh47zv4DJ8kk4tQ3t7Ft+05WbminUEyjd2ioX1mPq9GJy1KNHIFiIXzTHO998EPMer28eegAfs8CRpuF/a++xOTEDNl0kWg0jCCI1NQ1sn7zFlR6K87qerq7VtJUVwNyNVIJ3n3vfbzx2ut89BN/xNzcHBMzo7gcVhbn5hkbHWDbzh30X79OR0cHPT2rqKgpJ56IIEoCBr2IXi0SXApgN5nZuGELfl+UZLxAz9qNdPWuQasxIS/AtSsDzAxPE4rFWbdxE/lsDpVcYmB8mP7L55idmkSjlvPMU4+xbvM2Dh89gtJgJCdJ2CpsxKMxjh46ytFDhzl7tp9EJExbVzc7d+2hsqmCfCGNoUJLY28jFfUOyk1tyBGQStG3sQos+2284y+o3nLLB3norl2MjIzR13ed3//jz5GIBRkZ7EcoKdh7732MXLuCrCQxMDyAFoGSWkM6HiaXz7B24y4KZPjZ977Jx37vk/zk5z+hp6OL1es3cObMOWYnx7jjvvuodLpIJ9I4ysuRa0QUuRwDA30UZRooQbnTSC4WxVxeT6kEL7z4HI117RREA6HwEmeOn+Q9D72XJV+Cjq5OJsaHKebiDA+NohBlCMiZmpwnm4qzftN6SrksZ6/20dLcwdTsGLJSklW9nSx5fRj1BoYmxsgmclQ1tNHavpK163p55tUfMhsbwGCzYTHaMem0SKLIa8++QCElMXbhxncEbt38bu6/fRvXrg1QVdXC1j27Cfq9HD/yJnqdmXsefJhzp47hdlUxNj6Ay2zj4vVhjAYlWp2atq61RJNBnnvi52zdvoOXX3uZVV0r6FjRw6lTZ5iZGOWhD3yQarcbfyBEldOF2qhFSsUZGryC3lLB9OQ0vWs7yEYjGGw1pFIZXnntJdzOBtxNnfj8Hp567HEeeP+DeD1Rmpub0eqUnDx2AJfTzeDQdZSimkg4SSYR57Y793H8yFHmA4t0dq1kZnacxblR9uzdSiqeJBqOMDI1hUahpra5k6aWbjZv2cAPn/wyoZIHvdmC2WDFYtCDUsnzTz6NLCNj7OKN71DdtunddDVVUJIU3HnHu/FFQtjMJn75xC9obethy67djA1ewWZ1EI37IZljdGYBk1GF0+WgvKoBz+Isw1cuIBcVXLxymXWrVuOodDMxNcvItcvc/+D7aG5oZnxqnOb6JjQmHVIiwrXrl7FXNjB09Rpb924gH42jMTkplIo8++yvsBgr6Fq7hWQqxlf/9u943wcewjO1SENzO0aLhqOH3mDL5m28sf81NGodmUQcUaHl1jv28qtfPo0n5KO2roF0JkEyOk9XdyvFIkyOTOAPBVEpVDR1rKSzfR2967v5xk//nKQijN5oxmS0YjUayIkCLz7+KxQ5xU1zXPa2ueEF1Xd8cf/EBz9FYDbM3nveR0tXN2++8Cvae5uxiXpGZhcpFbMEQ142rt+FN7LITP8A3Wu6CYSDDPZdY8Ot2xElgbPnzyClMySkImJRxsTEBAsLC7Q019LY2Mz0whKPPPIIjQ3VRCNe4pEg5AUc7loUSpHFmRG8S2H8i0s0tnXiqGnmxIFDGGx2+i9cpH9sEKtWB3IlNdWVJHMpPBPT7Hv3XQxcvohCEslIRWQyGZs2b+M73/42G9et49LAFXLFNG5HGbHAPJu2buPUyfP0Xx2k0u1g7ZZt3HbLLn7wy+9QUEcoyUsYjDbMejlHfn0cqaQgVwRlUcHE0I1XM/z8H32B8f4perbsYduuPRx/7SW27FpDLpYlni0yNjrEgmeGbVtuIVlMkFsKYXPZ8C758E15aFnXjbJY4tylPgqpDPFCFpWk4OjRowB0dTZjNlpI5OGDH3yEmupKQsEFUvEIIV+YzpVrKJSKJEIexqfmWJz30tDagb26Cf/0HHO+JWKhMIdPHqWjoZF4KotWoyQnFBi/OsBDH/8o504ewaK3YnNVkMnkaO/o5Dvf/jbNzU3IRAVXr52ns62ZyeErrNu4iYHBMSZGp7HbTKzauJk9e7bx3Se+SUkboyRK6HUmDGoFx1858a856mQahvunb5jj1/72G5w+coHeLXtZs2ET186dorndjTynwOyo4OqVi8wsemmpb0WpB11BoCiXmJgcRcqDs6ERkSTT83OE/SEShSxmtYEf/vCHVDjL6O7sQKfRY3a4ufvuO3E5y/F5p9EpZFzsu8qWnXspFAqkE0FGxyeZGp/G7m7EVF5BmdbAtMdLPBxhdHocUQKlvIioNiGoZYz3X+Puhx7mlReewaK30rpiBQsLC7gq3RzYv5/ychvbd+3hu9//Ght62jh9+jhtnV3MznmJhhLYLUa61q1nz+6d/ONP/gb0KVAJ6LUGVDKJMwfOgWAkm8ugE1UMX75xjsveNv/vjpbZs/0+hmcm0agEZDKJ6honJw8doK19DYIgkohnsFkr3prNLpvHUG1Eq9FjEUV0ZfNcPn0Og70ClUxBZXc7Tz/2GO//8CfQmI0oDTp23HoLolxk5+0tmPUaMpkMhUKByspK1Co98VwRtVqNvcLNgjdMa1cPp85dpHw6SKYkw6JQcMc9d2O/6ESukLEUiiJkoszOzvLA+x7gjSNvIMvLuPO227k8eJ1AIMBLL73GXfe8l9GRAfweP9t3bODYoQO89/33k0vlUKpVfPJTH0Wr0mAwG/niV/4KwR5Dhwa5As68cYRCXIFcKSIqtORLQd73B3fw95984oY5runZhUJ5nZaGakqlPC2t9fz4+9/FXdWOQhShJFBT3fDWRUujnoNzx1itX4fN4QRJzoXjpzA5KnA7nGTIc+XFF3nfBz9KlhKxWIy65mYaauuwWCvQaDRks1ny+TxOp5PGhlbiuSJGvRGhVECSFmhfsYrzl/ppzamYnl9AFOV0r1iBQqvEaDIwMDqOupjm+tVBHvrA+4lE/ZhNZTTVN3B1eJB0OsuCZ4k77rqPvgvnCCx5cFe4OHfyNO+69zZkRYFsOsN7H7gXhUKkrMzGn/zF51BXZzDKDRTTOfqPXUVKKhCUSkSFhiIhbvnYVob/YPqGOVY7O1Hsc1DfWEc8HqHSXc4vfvITyh31uKuqMBst1NfXEwwGaXfU8quXn2TLqu1YyyvQqjV45icxlTtoqKphSlQwdeoUmsY23nXfPYQWp7BW1dHd0YnJXI4kSeTzefL5PAq9kX2330Y8V8SgMSCXioiimlXrNjI656dKNHLh6nXUaiXuqipEnYpcNoNnYobJqSEGxyf5/Gc/g1IFVe46jFod165dIxgMUigo2Lh5O8cOH+Lk8UNolWqOnz1PT+8adEot2fQ0t79rL6JKhc1o4jN//SnkZTnMKhOFTJb+I9eQ0jIEpRK5sohEil2PrF0u7v/B3vHFff/Bp9EKeoqJOK8deJWde7dTVKiYmZljfPIKD7/vEyRSMTyL8yAV8Y4vsKZ7E3atlcDiEqVsCkElUlbVRUqQ2HfbPTz91FO0tbXR29GG21nL5MQQfs8MYbWKnu42klEZ2ayAzWYhtDBLMBlHrVTT3tbDxNwEa1b2UO6sYX56ivrmFg4dO8XI1X4++gef4tSpU9TWrKKutpVXXnkNrV5OMODlwuVLLC0t0dnZTbFY5OCh14gk02xctxaD1cmd9z3E1OQ0G9auQy1XUma1Uelwc/HycQRKFAIZBq/NU8yCXKYDZYEHPrEDuTZJXimRKAZumuOzL/wzLmsN0aCDw0f2Y3XY8EcTuNwCYyN9PPqRP+LixTNkc2kGzl0iE0jS0tkKJRXRYBS9Vk9bQwPpZIxkNMzd972Pp596itbWVtpWdFBe1UIhn8QzPYlMqMJebkYtaslmBaxWE8HIHN5ICJ3aQGtLJ6FEhC0b1mG1V6ESoa61jeefewn/3Az3PvxeCoUCdbWNdHb0cvT4UdLZEAa1nr5YDJfLRX19Iy+99GuuZRKkclnW9K7EH0vy/kc3MzU+QFdXF0pBgdvpwmKwcubCIXRaJWlvkGveRUpZkCu0SGKO9//e9n/NMVu48eRrAOf69pMM5tDolCzMTDE+PYrGbGHlqtVcuXiQTQ9+gmeefpIN67dy9OhRitE8q9b1EgylmJ0eA0FDTUUF8USIiek5etdu4Y39r9PR1k7ntp3oLdWUWQ0szs+j1chxusowG6woRBVyhY5cZBFvOIheY6TC4UYS5VSVl3DXOJFKKSpqannhuVfJxIKs2bYRq8lIl2odG71eBgeG8XincJZVMDg6gs1m587b38U/feMfePiRD1Pb2oTDqMIXcvHu976Xkev9OJ1OjBo9tVXVKOUiJ04fQlRoSS+GuNbvp5gpIVeoKIpJPvipPRSUYeQqJelC5u0sA8t+C+/4C6qL89Os2LYb1CIB7xz7f/USVa5K3HVV3Hn7XUg5qKptQSpmmBwbpbqqilNnz6NSqTCZrLT3rieVSpHN5hElgXwujSCVGJ+ewuquQiZTUFVZQX//efr6LjF4bRS5WonWZCCaiqJQyMlmsxQlEaVRQ3NLJ3WNrRgsahwuB4V8kn27N7Fn3z6effZZasvtnD91iEgygl4tIaVz3Hnfezlz5hgrV3YgCjJWrujlzjvv4pbde4jmctgc5cxN+7DaywmHwxgMJhQqFQNjY4TzIvlEFt90gly0hNoAW95Tza53txBNL1KS5ZHkChQl9U1z9C/OseVdd4FaxKRXkw3G6WjpQNTI2bBhE+NDE2zdeTuZbByKBXpX9nL0+BlUKg1+f5C123az6F8kGAxjUGmIhAMIUglfMIDSbMPlclNT5WZsop++vkv0Xx6kJBfQW0zE0hE0GjXFYpEicpRGHY6KKhyuGjQGOQ6XE6WswH333kJ7VwenTp3CZTZx9sSbTMxO4FsYR5aT6OjqYWZmDLvdxMTIKB/64MfQ6/Xs2radUCpLU1srp0/0obGaWFpawmAwIZfLGZ+dJS3oSYaSBOay5GIFTOUytr63ip33txLN+ZDIUUBAVrj5+OyB/kvsvOfdVFY7cTlsbF+zBbu5jFgqQlNbJxdO9/Hghz+OJMtSbrPSu3IVV64NY7PZuXZ1gLaODi5cucjUtIf1q1YTjQTpbG+jgEQglmPNmrVYTEZGRvvp77/O1cvXyVNCksuIpsLo9TrS6TT5koS+zIbRZKWxtQ2lFqz2MlRiibvv3kVdYz2BQICgz8/i9AiiXuTqxdPMjc8QT2aIRPzUVpXjm1/kwx/6JKdPn2b96jVIKj3t3V388umXEbQqAoEAZWVv/RcxvxREYXSQWAoRWsiTi+Vx1uvY9kA1e97TTSIfpCSTk5UkxNLyOPf/aO/4lnskokIUJV597il27tpL54q1DFzvp8ycJxEv0lBjJSsXMBos9HR1E44l0YtyfOEgSDIGBoZYs24Li3Nemjqb0apVuNxVvPjii7z0+GPUtfZQ3VDN0OAENe4q9BYLFrOdUimHKMiYHBylobULpVZDNBXB7W4kl8uRSCRo7GimUMhjMespt5vZvWc7X/unr1PT3MDenTuJb1vJX33hb/AszbBn+ya8Xi9bNm7miZ/9lJVbVtPV2UbPmh5OHDqEf2kOtaaajo4OFhfnMMmVmC16UukYDTW16F0CkjmGSi1DUhRRmVTIdJClhBBMkV24+W3zcpmb4eF+3nj+GZqa27jl9nvx+/3Eo/N4F5e47dbNRNNJjFoTrU3NKDR6nGoVwVScluY2PB4vrW0rCS0FaVvRycDlMzgr3Vy6dIkX/+UX8B6JZC6JTmsh6FnAtH07Vks5hUIGtULk9LFTrNu8g4Igkconqaxs+Neum/q2egQBDHo1JqMGqaTg69/4J1ZvWMOGNWu459bNfPlv/wFBJbF720ZCoRArezr45lf/npVbVqNWKXjXu25n/8svg5AlvpSiZmUVklQEmYjJrMMcV9FUV4+/pEBmiaPSKCkpwWiUI2iKpIUSsnAeTcp40xzrGrbS39/HoZdfwmq1cvf9D7Jx43Yu9R1BqTawef1qUuk0epWRrFaPq76RZDRCPJtmzdr1LPqCNDV3o5QrKKssRyETQCEyMjTE9b5LDLW0EY4F6e5exeW+i2zevBmb1YFCKCBJEodf3c+W3beSymfJFbO4XLVkMhnkcjlVjUaUShGdVkQmFFCrDFy40kd5eTmuCjtrO+v5x3/6NmqjkjUrO1EoVei1Iq++9CqWKhsjgwN0ruri1JFj2Cwm8okCZqeVnFJBSRAwm3Qs+SWam9oJFacp6cNo9ToklYTeIKeozJIvCqgSImJS93aWgWW/hXd8y33znq1ks2n23n4v7sY6MrkovevXYDDbyZdEZBo56XiMM33nSSTT1Dc24vMtEQz4iUZCKEolpmdmsVhNaGQlZDIFM/NT7Ni2FZ3Njsmsp7O7m7Xr19HUsQqdTkM6myERiVIsQHVzA7YqB3lBQqszo1GLpJIxDHo9ckGAkkQ+X0Sh1lDIpWmsctPgcqPSivi9PkIxHycOH6Whto73v/9B2ld14mqqo7m5m7GxcTTI2b59Ow6HA4vFQiQYwO5wI1MZUMiVdLQ1s3XHLZgNZmSCErlCAUKJTF6BlNTy6g9O84svX+CV587dNEedXYfJZOB9H/gYKpOeVCZMVX05OoMVhcqITCVncX6aIyePY7RYsZrNXO0fYHRwgEQ8StjnY9Hro6qqilw8TKkEM/NTlJfZUJksDA5dY/vOndTW17Fy3VbMZiOpTJp4OEIhL9HR24PKogOliMFoQ6dVkcumsJjNqJVKsukM+XwRtd5AqZClssyGQSai0auYmZrA45vl8P5DVFdXc999d9HS04nFWc4DD3yQfL6AlMqyZ88eNBoNGp0etUKNzmBFbbAhiiIrutrpXLEWjUKDhAK5AoSSRCYrp5TQ8MRX3uQXf3eOJ37y+k1zjKQDWC0G/uBP/gSV2YBncQZRWyRflBGNZikIRWbHJzl36Tz1He14PR6GR8YZ7L+KRqsiGYuCTMDpdKGkRCKRIhINkM9mSBfznDh5lLbOFah0WtZv3YPJZCCeTBAI+JFKMlpX9yLTqVFoNVisdowGLUpRxGoxodNoiEdj5HJF7E4XxVKOsUtXUeQK2Cvs+L0eZucmmB6eoK62lj237WPF+tUki1k+9ak/prm5GbWk4NZbb8VmtYAg4rA5MJptWMtrEEWRTevX0+SugaIMmfyta0AUIZ2VU4woeeKvjvDTL5/ilz9/422sAst+G+/4lvvalWv49ne/yj333MfjP32C2+/YzrmLfezZdQuNHXayJQXhwBKtLe3UVDuZnV/EVdtINBhAJlNQ31hNVpLhDfjI53OkcxLZRI6sEGN0epLtu25jYWaGckcN3b3rUIsFLDo14XyaXC6HSqMjGUsSi8Robmknk8vgclcyPjxCJplDrVNgsZcTjydYnJtiMexn/Z7NVDodXM8kyRXkNJZZWL1hNeYyG5PjPu5/zz2UJIH73nsvhVyRTDrOli3bKUpxpmdmmJ2dxe2swF3lpKKqAoVOxwMVH+V7v/prIoUg+Wg5I5dPYC+rZPum7djebeTiuQssTI/fMMcPPPIIX//Hv+Xuu+/m2KET2EwadH47tfXNGJ1V5BGxGExsWL8Jh8PB5f4hetZtYHJ8GKXOSEtrI0qdgaVIkGwigUzUkk3kUCskfOEwK1ZtoO/sWdIZ2HfbFoqFJGatimAmQSqVQmcwkUvniEdj1NU3kysUcDidXOnrQ6PUYbbpyRcLRKMxosElro70s+/dt2KzmGiqqSFbEDCpZPSuWYnObCW8lOOLX/oLcvki+27fSzadI5tJsHfvrYiqLF6vHzErko2G6OhspSQUWKFWU9/k5vsvfomiFCM8n2Fu5ArOihruveMOzOU6Lp45z/xU/IY53nLrPp74xU9p61iBrKRgfm4cp7uS9tVrKMlk6C3lOAUZSlknMgn0Jiu7u9cwcP0iUkkgl01TZrMTikeIBYOUO6uZnhhGKZPQmUyUBCXTY0MoNTbWrF8PUgajKCOcTxKLxSgrK6OQeyvHSncthVIJuUJB3/lzqEUtDpeNQDDMUiBIKZtmYX6SsoYHMOg0JMNhkpkiRq3Api0bMGl1RPxpvvzVL4GQZ9O2jaQSafLZDCtXrsRolhNcCiHkM3iiYdasX0koGqJ7y15aerr44at/S6GUYWE0RGg+iNtdy3ve/y70RhmXzlxkdmJ5rPt/pHd8cV8q/ozGpiKvHDiAWh6i78Rxalq7+NmPf0BtfR27b7+HZE5iYmAESa6mtr4WpSAi6jQIlLh49jw79u7EqtdTkoMmq2dmaoQzC7Osbu/ixz/4NlvWrUFrdlHprsNVVUZaVkChVSNDhqDQMjU5i9FoxOv1sRiOUIj6uHTxKocPvEkiHSWek0iF/QhaC26nnvvuvRNBkhi43o9MBbW1NVTWNjIzNsHk7BzdK9owGs3kiwVEnRyfN8SKFd384Pv/hCgTmZmdxF1hxxfyg1LB5cuXuT58hcSChmTRjK5JxF1lIakPgSmG0aRnxyNrOPzijYu7N/EjelcbuXR9kHR4jAvHVejtLo4cOMC+W96FXJChUZsIBieZXgxT39KEoihj1doNFIt5Aos+rDodQrGEqbKSfD7PzNQI3sV52mpaOPD6S2xZt4ZETsGKlWuxlmnIKkqojXqkokRJJjI/s4DVaiUej+MJhRm/eoajR04xNzVNNBEhV5JIRMLobRWIijQmjQ5RLueff/IjFCrYsWMH5ZU1XLt4melgis7WRsrKykmmU5gsekYGp1m5sodvffPvKeaKJJJh1qzt5crAVapr6ui7fInRyUGMuQbmFqapatZRXZsha4pQ1IawWPXs+OBajrwyecMcE6Un6e7RoVHZOTn0JClHNfFYhmQqwcc//l/w+uYx6bWo1XpQG3BUG4iHQ1S4qxAEga0bN3F1aACdTofObMBgMPDsUyeIxyLYbdUsTI0xX6Znau4SDU1tCPIMqgozokZHsVgknS+RjEXQarUUCgWGJv08/9hP8fv9zI5PE0yEEEU5Af8Szup6krEwf2OyIMrlPPnU44hKWLN6HaayCn7wj9+ibctO2uy1GAwmEqkk1jITVy+NsWH1Wn7+2A+JR+Lk8knaO1o5fuYUzS1tXDj1JoFYgO6K3Zy6eJSaVgM6uRzBkaIkJqkot7Pr0fUce236d1ARlv17veO7ZWpqLOzeXcujn6tFJi8i8da6nNt3bEGuUnLpwlmKmTi37t5Gd3c35IuoNXIOHTqETCkiVysZHR/D7/cTi4UoM1lZtW439U2rCcbjfOFLf8Xw1Cw1DVU89vj3eOXF58iki8SiybcuXOXzNDY2Mjo6yi+ffpy/+8u/4NX9x3nupV8jiEokmY4KrYUnnx/CpDYzOz6LyWQik0hy6NQpLDoDnW3tZLNZ7r33XlLpJc6dO0c4soQkSUTCCcorakDM88iHPsYjH/gwt9xyC4loDI1Cyekzx0kkI+zYtwdnXTkGm4qwP0haUGOng+d+cIJfv3yaiCd/8xzrbKxZY+K2D6ipb6hi/frVNDQ0sHpNLwVB4szxo+QzMTavW4XT6SSyFAShwNGjR9GbTSSyaeb9XlKpFJlsDHlJYNW63WgNbryhAJ/67H9lYHyK1etX8Z1vf5n9r71CIp79H3J0uVwcPnyYf3n8Z/z1n32exaU0g2MjaI0mFBoLDrWRJ58bIrQUIeKPoNfrCfmXmFnyYtbqaW5rpVQq8clPfpLhq2cZHBwkFg8BEArGcLkbKAhp3vu+D/HxT3yarq4VlPIFDGotJ04eYSmwyPqtm8kQR2OSs7SwREpSYCm08Mz3T3Ho2FWCczcf5VFda2bVSiVdm+epcNp48H33I4oiNTU1pPJZXnz+WdQGG401TpLJJPFAmGDIx9TUFFV1tQTTCbQ2MxqNBoQcizOLbNtxN4m0kqiswIMfe5T+gVHWblzJD3/495w7fZJoJIUgCP86TNdkMvHMM8/wgx9+hz//4z9EphIZm57E4rBTKGkwo+Gp50eZnVkklUohl8uZn54hK0jolWrWbFwPwCtv7OeVZx5jbGyMSPStqa7DoTh1De0E4z723no3H/7IJ6iurkYmgUah5MjRN8lRon1FF1dH+tBZlPjmlkiWZCijbp756THOXZ7CO3nj/36W/X/jHd9y/8afjXPrA7UsXj7EXz75AEdemcE/0E9LUwMbe8qRG7sYvXIJ06atBJY8eKdm6d20FofZTDaZorOzkxISk6PjpBfjyGVaVq3ogXyI40fGmR6fwWoxEg9FuOfuB6hwVJKMJLBYy0jnMwT8Xrx5Ca1aw9z4FAcOvczk1CgOpwOZQonGqAWpxO+9v40MRmwVFSx5vKT0Ibau3YLeYmLTjm3MTUygN9v4h7/5CgVZFq3GiNtdzcaN69mz71asVitqlQQyBZJSg93tprzcQfT8aVauWc3+/c+xFFhkdHAMUVAjlEQms5dRKg2Mng4Qnz110xx/9CUPO+52MPvKm/y3Jx7iie+fZ23tevLZNKIYpHr7djKhJWQ1dcjzWTKJJLbOViw6HdFgCJPJhFKtIuBfIhAIYC0rZ9WKHsL+CY4eOkwpU8TlLCfsD/Dw+z+C2WQj5AvirnKSLeZYWlikJMmora7hscd/wcVLp0mlYxhMBmRKFUqNkXwuzac+2IZKpcRWVkHI7ycU9LGuax2Icqpqa7h28SLZIjz12JPkhZ+i05qoq2vgllv2sm7DJoxGIwqnBkmQ425oJpUKUVbmgJEB2js7OXn0FQKLcyzMeVGgopSXMZn3o1ToOf/qJAtDkZvm+Nz3C2y53cqVAy/zp499iD/54D/xxT/8a/rOn2J28CB33n0XS7MjyGpbMOi0eANh6lubWTpxCt+Ch/nZBbRGPTKthojfRxGJ5uZGOtureebXb3DPrXdhsRrJJvPce+/DKOQqgt4ACrcdrcnKwtQUmWyJ3pWr+Po3vsZSZImDB4I4Kp3IVVpMtjJi8QC//2gbokqB0WYg7POzODfJmq61JDJZdAYtF06dwpcI4Dt0lINH9qPVGGlqauGue+6hu7sbp6uGTCZDvijRu3Eb8/PjlNmshOJhzCoFA5dOEPItEFyKIApq8hmJ6VIAuVxk/+MXcTdY3r4isOy38o4v7ll5kad+eJwHHv44i2em2dxRgaKpGaGkpVBMc/bQc0RzJb7zza9z77vvQVKrGBufRFDKGOofpLqhjsErl9iwZSvBcISB/gtUuevo7NrGiy8foZApseeWuwgHl3BaK0hlYhw7fAK3243D5aSqppqFqXFGR4epqatl2+bdrF+/nhef/xX6RhNmo5FYNEVN21aKeRWB4BCiKHL65BmyhTzyWIJUNsOd992DTK5izZrbmRs+zR0f+ANOn3iBV14/yPXhETp7VrFjyy5K+Rxr1qxjaX4Gf8jHjh3buHTtKufOXiKXLaIS1GQlAaW6iEIQkQQwGwwo9OJNczQ6yvjVT86w+5b3sHjWx/37uon7EtitVaSSIY6/+QShVJ7hyXnWrl9DSipx/nwfzmoXIV8Qs92Gd3aW1vY2Fn1+BCFNOBymvmENZ84NsDCzSENLB/Pz86xds4V4JsbA1Uv4fLWYbWVUuCpYnJlkfHyUtrY2IokkDoeD+ZlJ1CodDrsFv0+iunULrpySQHCAYqHE3NwciXSKKkslqWSc933g/cjkKjZuvJupa0doXXsrvqUBfvb4Lzl17jz1za1s27QDrUrJqlWrmRq4SigWoaOjjQtXLnH0+DnyuRIqmYp0SUKpKSET5AhyGVaTCbn65hNe6crM/PLHZ6lrXM3iyXn+9r89jHf6Kru295CIhzjyxpP4wkVW77yL6moXC6El/BdC1DU3YtIaWbGql5g/iKhVIopyqqsqmJ+Zp6FxLe+5U8Pk8BRGazl9F87R1NhOMhshmklSREKj0VDmqnorx+lJuru7GRobZ3x2Fo0/iMypoarSgWdBwFHRQmVOQTw0SakokUymKUgl2psa8XkW+OgnP4GEjMbGzaT81zG5VpArRfn+j/6Z1pZGelaupq21C7NBj9PppBAPkcynMJuNnLt0njcOHKOQl1DJlGSkEgqNhJQVkMvAZLIgU73jS8t/eu/4V2DtPg2Our3oEwF+9pM3+Px//TQzC/3IRT0iGVx1CjyXxmkoL0MvLzIRCpJPpBH1IlX1bubmZmht62B6ZoLKSjcyhYbFRS8btzbx+f/2l/z6xRc4ePQNPvDIx/i93/s47334QarqGjGZTJhMFoaHh1ErVdTUVzE1t4BSkJiemeAvvvR1/uYvvkBtowJBKJFJFlArkvS09/Dir3/NrTt3EFoMcOd77+GJf/4ZTW3tqFV6rl/bT0FQcuDl55Hr5RRKccbHR/F6PQSXPOzbtw/vrJeR4WuUO23MT83x0qvPYrCpUKjU6PRKJEGGKIrI5EXkGgGVooRGY2bw/OwNc6zuCbPyjl2U5oN89e+e4ntf+zKzkQvIlSLJiJeKKomipwDx8+jlq0jEolSWVeD/zXjxUqlAbV0ds3NTOJ0uPJ4ki4tedu7dg6umkm9/65uEI0t8+NFP8Nd//UXe89AD1De3kU6nKS8v5/r16xj1BkxWA8lCFimXodxh4+EPfIwv/9VfEtckUau1pBNxNKKMzuZuBgeuU+NwM2ucpWttL1//+69TXd+A2VTG5UuvgFLPyNAVFHoBSDE6Osz8/CyLC9O85z0PcOnUOWLRAGqdyMjkMAcOvoreqkSp0aLViUiCDKVSiVxRAmUJnVqGKOoZPDd3wxwN9dPctWsrubEs3/3Oi3zj776IV3aZbCGOPziJsyaPUqMjNPUaNRXvRSUqMKp0pDMZTpw9yYoVK9HotFy5eoG1Gzbg9wfxen30rl9Nmd3CCy+9SCjg5d3veYgjhw/S3NGKzW7F5w3S2FTL4OAgOo2WbCGJzmwkHgqwbeNq9t5yPz/98Q+IS1EMRj2peAyVHBrrq4nHohSSBaQCKM0G/ulr38PudEFeYtHTh9JYga6YIptIUyolGB4exLMwx7nzJ/nwhz/K3NgMsWiATD7JlYHLnDi7H4NN9f+Xo6gEuVhAoxUBFUPn59/ucrDs/8A7vrhbVyhZq4Z/euPHLOnS/Neffx6zqKPOWE+Dvpodd9VybNBDYmoeR0uEztY2FgNe6hvb8fnnMGjUyEQFzS3tzM5O09axAs/CHFqtFo2oxGS2cK1/iLbuFTz8kQ+yvmc1g+PzqDVFrl2/zqWL53G73VS4HIyMDFPX2sbo0ABf/fJfsn7LDqbGh9ApoaqiEpVKTZWrAo0oIxBK0tzayujV6xw6fpq8Qo6gEnBUtuJyO5mZmiUWiJBMZjHplYTDPo4dfpOpiWFW9PQgKeD85bMcO3MIg1GDVmvEYNSRl2URlFlUShVqrQalUk5RIVHMZ2+ao6EZusUMX9j/XSS3iUe/9WlchjJ63KtQxgT23NvMyP5TlLwCx/uGWdfTzWLAS/eKNQSCHnwL81TX1uN0VzMyNIiEAoPFiiAIVDldNLW20ncuzOoNm5EpBeora/EGk8jkRU6cPMmVi+fZuHkz0WSCkZFhbK5KFubm+emPvs3HP/2H/PTH30NeTON2unA4KjAbdCx5fEg5AWdlFd7paWY8PgpFgYwE5a4WGpsbmJuZxxeYI5lMYTWZiUT89J0LEA37Wbd+PSV5kYv9Vzh4/A3MFi1Gow21RklaSCGqcmg1GtRaLQqFQEkuUSjkbpqjrVGgRRnhK6efI2nN88g/fpwqk4NtnTtYGprj1gdW8KvXXscSreflw+fpqKuloM5jM7rp7rESWlqisrKS9p6VDA4N4fMt4Xa70Wq1dK9axeTsLPNz02zesRuNToVZZ8JgcpFMDnPi5EkiQT8rVq4kmc0xPT2FqdzBki/MuVOH+eu//Spf+NwfIWWjuJ1u2ts7MOpFrvZdJpcrsW7DJnLpCFlJQq5QYy13YcpmaWiqZ352gaAvSiQSw1FmZ2lpEb/fw5OP/4TuFSsoygpcutbH0dMHsdr0mM0mRKWcrDyDTJFFr9Oi0amRixJFJPL/m9/HZb97/67iLgjCNBAHikBBkqTVgiBYgaeBWmAaeI8kSWFBEATgm8BtQAr4oCRJl37bEyxfkKPc1Iha3sCWNQom4/MIMiX+godw0UP2VxK737UGRVyklE1z8vxpamtricVDyOUio3Pj7Gtt49TJYzTVN6DVqTEatKSTOTy+RXZt38H997+X//ZHv49MLaOivAIpl8DjC1HucCHItTR3rMBi1lLIZ5mb9/Hgg4/SsaqTL/6Xz6GyOynEl+hqb2PLrh3oVXL8s15KQgnvfIhIIsmf/9mf8MQTTzPr9RAKhRDVRWKxeVLpPIIckvk0iUSccnsl1dVu5hemaWpu5fzcHMV0mmA4wVJRhiRXoNdqkYoFEgk/pVIJSZJQqjSIppvfxNRYNHLdn2DdqjtJFZfw5H2IGi2Dyeto1QK6A2bWbW9Gk9EyMZzh2z/8Lvfffz8z0+OkMmmCwSDV9Q0cPXSQhtpaqhoaWZyfJR5NceXqBapclXz0x4/x7jv30tHdxnse+BATQ2PYqsrIFAUclfXoLXbkooLurjZi8Qwdbauob2vgv/3h52hasYrF0ev09qxg+97daBTgm/IgapUMDwzjWQzz5b/9G77xjW8RTScJRz3ML0AotEg8GkcmL4EcoukkDpuLykonI6MDOJwVeGdnkbJZgjNpAqUIRUGGUiFDo1ITifj575PnqdRqFMabT6TXbSnjl8/2sWl3L/MTAfyyIAqtjhOLx9CXKTh/2M2eB28jPb7EoYMeXjs0xHvf8wCjIwMUpRIauQxBriAY8EKxyKZNm/D7FkknM0x65hgZHuZzX/hLPvTAPXT3dLFp6x6OHT3Jtr27OHb6PPXV1SSyRYpFiSp3BU1NzaxatZlkLs4ff+oTfPqzf8YzP/8ha3tXsffWWyhkM3jGp3BUORkdGiWZLPJnf/I5vv71f0TUqhn3TzE5lSUSXiISiiAqBQxmA+l8Ao3KiMGgY2JyDKVSRsjrhVwO/0SEJSFMAQFRLmDQ6RkMjrwVkFRAqzeB7uZr+i773fs/abnvkCTp305g8qfAIUmSviIIwp/+ZvtzwK1A028+1gHf/83jb8WxqcjoeSvjl0Iszpr5+Jcf5I03X3mrhSBJ9E2eoj2wlc989Uf8/l9MY9XcytTMFNcGrlNXW0lNhQ2/b5HLV86xalUPpUySSDCMpkOHw1pGRbWbj3/8wzQ11rN+/XquX7/O5s2bEQQBpVrDtq1bkSSJM+cvIy8WMVjtDE+N8A/f/Dseeu8HSOUS7Nr8Abq7GpCpRIrFHPnRGYr5DHmhxMoVXcg1ejbu2UbD7AKvvvQ6c3NzFCQJq8VAKpMllciyd9duZrxTiAYlRpOZpcwUgjtOV60buVzO3HyQSqONS6cGkZIqipJEUdCgVhTRGuWUEG6ao6m9SPDQNk6+9lUErZIHP7eL/sHzyC1KihQ5PnaARxa38aPXXmbzu3rpaV3L2MQYcrmCfC6JEhmRwBLBkIcNG9YSD/iIBMNUuGqoc1ejt5n5+7//S7bt3EJzawfPPvssn/3sZ/F4PGCXE7daUSqVTAeiLPpirFq7hjMXzvLFL3+BL33x78nlk7Q+8m6qKssoCBKSVMDv82HUa9AY1DTZ6kglstR2NKEWFOx/LYxv0UsgGkFJCVHUEo0m2b11O0sxHyWlRFt7B+PTl6EyRmdVJaIoMjcfxKE1ceX0KIVYhhJv5aiSFVDr5ZSkm/+R1LoVtFa+jx/96IfkM2F2PtpAQYijsqopSFmOTx4k82ItCVGOoSGHWbeDi/1X0ChVzM6MY1SZeNed1RRySZpb25kcGWB6Zp7OVavIS0ne9/D7ePbpn7NmfS8GYxlvvnmIz/7Jf2XR6+X2ve8im0sQS0XQm8pYCkTZuns7rzz3NEMTY/zd1/6RsdEBvvX9b6NUFMlTwqI20efxICsV0Jm0JFNx/IsBNFYTBpUJo8ZEPBbH5w+RikcoK3cQCEToXdlGDpDrlFhteqYXrlJyRmh3ORHkKvy+EBaFhmvnpoiE8yCTKEkalAoBuaqEwPJiHf/R/l1T/v6m5b763xZ3QRBGgO2SJC0KguAEjkqS1CIIwg9/8/lT//P33eT5b3gSm+52sHfPNnq72/nuN7/F7fffQmw6xXh2CoNJQC6YGB6aJzY3zS13bOLAj0tkigU0OhMN9eUM91/H5qqnd2U38USRbZt2INdoETLgi/mwWa34fIs8/diP2LLzdsKRJHa7A7lcjrOygj/6vd/jk3/0aUKeMfbsWE1lvRtJkshKebz+IPUNNczO+8hmkhRKecwWHZFCFIQsMkEgFk2TTCeJLcUxa/RE5xL81T/8Mz29ayhlUrjcLvQWHRl1iDxx0tkMao0WpZAnlk9TkqWRy+UIJSUFmQydKKAUJaRigUI4TzYuMTrkRUKJZyJ0w9dwy11V7Ny9jlVdbXzvW9/kjgdv5+SvL6JrVWM0KxEkHaMjHuSJOBu3tfHqd4okS1k0GgMNtXbGh0dw17ZR31CLzx+ld8VaaptbIC0xOjdKhcOBz7fAC798jDvufT/FkoTfH2LHjh0Icvjzz36GRz/+YYYuH+fWPRsxlZtRKpVkpTyxRBqnq5yZOS+5bApJKGE0aQhmQyjEIkpRJBiIEUtEyMYKmNQ6Tr1+lpcPXmLrzl3452dxVjrRW3SkVQGypRiFUpF8oYhBLSNRzCLJM8hkMoSSkqIgw6gVUcgkspkE8pSM6FKa6YkQxZKCxckbL9ix591trNvUTHttBz/7xbe4+5E7OfjEGWyrDBhNKqSCgcX5RWxqEzVNRl75Zo6ElEUhE3GW6/DMh+no6kSr1ZLJlTBqLXSsXIVGpmJmcYbm5mauXbvM4ddf5OEP/T5ldjsnT57l3nffyZXLA5w5dZT3PHAfLz/zE+65cw8asxqDyUheKpFIZzCZDMwt+Mnn0sgUAgpNnmQxh6gsQUkgHI4RT0YppQX0opoffuVHpAQ7K3tXseRdwGw1v5Wj3E+yFAGZQCabxaCRkSzmQZFFjgJQIJUEdEYlShkk4mEM6Bm96CGSyFEsyW6a47K3zf/dfO6CIEwBYUACfihJ0o8EQYhIkmT+zX4BCEuSZBYE4RXgK5IknfzNvkPA5yRJ6vufnvNjwMd+s9l7o2M/9OFVkJeIlBLc9fAWjj93HXuTC6tVjTcwTS6XwqKtZWhqiLaOauYvLZCcNdHY3EAiFsfriWIyqqlyVhDO5HHoLMSzSgT1WzeQ6ExOmpoq8S8FicVidDa3gixN2D/Pr1/ez569D+KN+lkKJOhpb0GnV6DRaDh15gLXr/dTX9/Epz/9KUqFHEIpTzwQIpKIQy6Po7oSjUFLOJQiFg/i8XjoHxxkYu4MKrkFQS7j+uBFBDGHXCmSlgLozFp0WgPpUhalUkSjfGvKBFGUI0kSCrmaklpCrVKRTOQoFAqIyhJamYqnv3r6hq/h+z64CpkEGVWOTfs6uPjGDO7OGlTqPP6Ah3whiUVby/TCJDV1VkZOzVIudGJ12BgbGUYhMyCVMlS7nEQzBSpNNtKSgSwibrebaFJi7dpOzp0/j9VahsNiw+W2MDc5zLm+fjo6tyHXi0yMzdPZ1oROr0ChUHDx8jXGJkZxOir5/d//NMV8llI+Q2jRR14mQywU0NvMGCxGYtEs4YifwcFBrly/TiQ1ilpRRjKdYHp25K3CoxRJEURn0KI3Gkjl0yiVImqxiFwu/muOcpkKSSegVqhIJDIUi0VEZQmVpODZr914KoeHP7Aa5FBZq8VQY2LsTAh3Zw0yYixFligUU1gNTSz6ZzCZBcbP+mgwrUVj1HL86BFa27pIxxPoVErUJjsmuYjV3cbUvJ+167bhC4RoanLxq189S2tnFzatjp5VrUSCCY4cPYBa48bksKLTmRCFEnqDiEJp4Pr16yRSMTQqHR/5yIcp5tPkM2kWZyZQG8oRMnGUZhMOVxnRcI4FzzRHT11g0TNNpjSLTuVg0TdPMOyhQJpMMYpMK0OjU6Mz6MkUs/8mRzlKpZJSqYRcLiJp5ShlIslklmIpg0olosjD89/ou2GOy942/9fzuW+WJGlBEIRy4E1BEIb/7U5JkqSbtb7/VyRJ+hHwI7h5yz0VTRIuRWlobiVTlFj3kI1nfnwCpcyFy67F50syGBtAU1bi3PGrVFeWE5QtsXBmGrHMRE9FA5fODxKLBshmc+ScVmyWamYmxmlubieUDHNdusTazg76fT5GxjW0r+hENMrZuGUfX/zSn9LU2IbVZmfbhg60dh+5wAxmXYk1q9ai0ejIpfPoDWoGrk7ywx99B/JF3v/+hxg6foJbbr8TtbZIUTJR1WjGUe0g/lIMq1nO+Qun0RnzyDQyJKmAVNJSqa/GF5hDb7SyOOZDrSmARo9GqwBlCaNOjqokkcmDVi1SKEBJEEFx8/iTsSQpMUWduxmNyciaB0289POTSCUrFWUi0Uie0cgI6OIEvIvYysxcH7pGcTyNYNCxrdXNmZOzFPIJkskMUmU55WU1RAMpdGIevdGKP3KOnZscXO5bJKzQ4FI2Yqpow16R4Nev/JKx0Sn27L0FmcyFrapILjBDjctAIVtHQ0MTQkmGWq3kjYPH2H/gFURk9PauxFJexs49exFVRay2cjp7TTR2NPLaG2+gUKTovzYO8hA6ix4oIKCjwd7A8MQ1LLYKfDNL/5qjVieCsoRGUUInyckqQa9Vks/nQa5CIb95t0wikSCnLqAuuKh1lWN+UOKln52hUNBTbpWTSpYYjV0kU4phM2jQOtScHzhPKZMBkxqXVcmlmSTysjzhuTBZs4F4OkauqGLq+jmUeg1FeZqPfmAzp09NoXPVEk0KiCYLibSAqEnzta98iUc//AmclQZam60szQ7SXFfOyKREd0c3QkmGSqXih9/9Z+bnRtGpDRRLOW676x7MNjuiSkF1fSN32SoJRqY4c+4C4fAMBSlGtuDDaDehKpmQxCItFU0MTw2h1BoJz8VRqbPkRRlGoxFBVUQtL6FDQU4poNOpyBflyAQlKnXx/6QcLPsd+HcVd0mSFn7z6BcE4QVgLeATBMH5b7pl/vsingtA1b/5cfdvvvZbCcTzdN7VRWO7Fbu1yL98c46Guhqaa9sJRr2kpAj+8AihQRkVVS6uX17CoC0jJ0vgbC6gb5CxZ/tqasrMaHRa8po0j3+2j0c+dytOk5KTL0RIzqf5/pu/JC+pURvH0WgEjh3Yz4o1m7j17s3c83tdjC5cIpEboBjopJCvIBS9gNmUY3R8DNmbEvfefSf/9PVvgkpAAfzsyWdIZ+L0rN6CwaQgVyjhXZwlwxKzs+cZHImh1umpq6lDKsrIJiIEEgtEctNU1FkpUKLeaqWEQCaaQimquXCin2xEjqwkgrKApMiy+/btQIKk8eZ97skiNO9po7nLil6X4vXv+6iuc9PobsYfmiOvSDEyOYpsAcrdTiZHEuh1NuLpPLUrlYgNRR581160Uh5RI4K+xM//6Ayf/Kt3oywl6D+YYeZKiP1DRxBEEzL1JHqdyPjAdZR6K2u3t/DH39rBhOc6peQCC5ONxGO15DIpzOY8l66cJJ1LcMftt/LKy6+RkfIogWNn+4gnovSu34pClIglUngWpgmlppmYOonJZkSmlWipakOvNREL+ggkFgkmx3A2WJArJOrLrOSLJYqpPHJB5Oyx/rcWO5HUSGIWQZln3527yORC5Mw37ysuKJXUbalhZZcJUZXghR/PUV3vpLaiHk9gmmI4yNLADBqNmlhRhT+RQmO0kCxB/Todqk4TD7ynEUU2DSpQGZV88yOv8Vc/+CSjF/pJemoYPDXMpYv96M0urg5OU1tbi0VvxVlRid6d4ceHPoYnOIkq3Epo0U7/1Wna253o1COc7zvKot/DXXfczvTULJmSglQ6Ql4Q+PZ3v8eP/vln5PJpYtEEnoUJ+gZPMDLch6AUQAPNna3UuuuZnL5GNBHGnxjHXKlBFGVYKkwk0xlEmZpSpsiZQwMUE0qUMg2SmEWuznPrPXuJxX3krTe/72LZ797/trgLgqADZJIkxX/z+V7gr4GXgA8AX/nN44u/+ZGXgE8LgvBL3rqQGr1Zf/v/Tu97G6lpU9LldvDznx1h5apOlGod/vQQKqOKxZkgFm0ZOoueYDiF2apm912bGe3r4+Cpy/SsbmJ2fJ6N3bWcfm6U3tvdPPCdTtRXjHzpy88iF2RUO6w0dzQxOjHIvoe7sTSPsqvDhL0gMfVUkcjFSsz5Woanp1jdLcdaZqG6ykUyFcZZZuTsqcP09vQiUytpaO2hlMkwPTVEOBzmwJtv0LmylWQ4jD8xwcnzv+ae+z7EY7/8R/LaKP6UkpKUpyBJpJAhL0jIVWoUhQxKuZrK+lo8QQ9ytZwPbb6fSCBMNpRAKCp56ZfHOPX6JeTIkcSbt5S676mhtlVNV7WTx39+gp6VHZRkCvyZUUSTioUzfupcFVCQk0rmqXCZ2bhvI+f3H+Ho/lFa/6SGUydO89A9W3n1hxfY94mVPPCdTq7/coyjJ4eRAeUmLV2rVjA2OcDtH1mByjVMe1cKW66Ob3zxJbpr/wtV+lXsP3SIO2+xYdQZ6btwGp1exKJXcubkEWSSnKJcoKVlHalImLnpUfx+LxcuXsBWZiAeDpMXQ7x+5DE6u7bRP/YGeXWKcF7NUmiaQkFCodOSK+Qw6gwUMwm0Ki1VjfUshhZRKGV8dOuDeGbnUOYFMnGJN54/w/FXLiCTZJQUNx/l0Xa7i+omDbWVep7/5SCrervIS7K3ctQrmTm9gGQAg8ZENlWkpa2O1jVtHHr2NV55bo7eNb28+MJLfOiBXfzqaye478+28OGfruHvP/ktMnk9et0oKlmRNRs3MDJ1jYd+bw8FUxwpGscgufjOl55Gm38Qd9UGLly6yL7dVhqaWxgc6EdUSqhkOU6dPMjxQ0dBFGms7MC/tIB3YYLx8XEuX72MQiwRDYfxhgYYmztNJq1CoQmQF5IkBT2XpuZAkmOvriIaC6LXasmnYmiVOtpWthBI+EAQWLH9IeYnptArVAQWkxx7/QoHnjuDiIyi/ObTYSz73fvf9rkLglAPvPCbTQXwpCRJfysIgg14BqgGZnhrKGToN/3v3wFu4a2hkB/6n/vb/xfHuOFJfOPcLVSVV2NSKfjmF9+gqtaBGMrQ3LaBWGmcZFzL5YEBspE4GkuB+fkC733fPcxO+Zk47EO/qsgtd6wklojT6Xaj0bqZ9J4koZV4/W8GeeAjm0kr5TQ4Oxg+PMuLL5zGHwpTXduFXC5SU12Hw2qnzOWis3sF03MT+KeXaGxroL//PLGwF5VBw8xEkDmPn2w2i16vJxEPE4/G+NTnP8PkwGXmYucoyASKYprJU7M0r6tBYzDgC06iVKgoFAoUyCEgopCZ0WgkFPoSer2edDpHvdmCodZBOBzGqDfhdLnxewMMXh7E7rDy0+8+j3/gxoXpWxduo6q8Gr1C4PtfOYLFokETL9HQuga0fkIBOaf7LiElMthqlETCMnbt3cHClJ/JYwHqbyujZ1U1gqKAXdJgczcwOnuMglHLq397hbseWk1Bp6W9agXHnrnE8aMDeANBautXYjabcTnd2M02dBYLtXVNxNMhLp64xIrV3Rw69Co6tYDOYqCU0zI5s4DX60Wr1RKLBkjEEnzkDz/NwuQA3vQVErksqHMMHhtl7a4ektkMicwiSoWGQqFAXpZHVhKRC0Y0GgmVSUCr1ZJMZqi3mDDUV5KMxRFFFS53NSFfiOGrI1jLjHz/a78iduN5w/jexTupdlSDTOLnf38Mg1bAXNLjqmlAoU8QCsG1K1NEQ4vUryrHbHDhrqxmZszLzNkovQ/ZcbucGKwq5EsFrNVuZpf6EK0VnP7edVbsqAKdkVZXJ/v/5Qx9FybwBkLU1K3A6XTSVN+E0WAhlkpT39BENBlgcmSS6upq3nzjRerrK8hToqG2hzPn31r9CyAU9BANx/nEZ/8L8xPXCBdH8GfnUIp6Rk6O0bujh2AkTF4KoRK1FKUceZmEkFMhoESvlaO2ylGr1WT9EWqqKjDUO0lFkyCT46ysIhoIMzM+jUaj4bv/8CzJmZu965e9Tf7fXSD78fEPUW10MLM0z8QhGWoRorpRfvJfzvKnX/0v5MUpvOMJLg+exlFh4e73P8jSaIqJoJ9T+0dIecJ85qfvQidVoVJ5SISS1NibuZ6epU5r40/e+xTRcIZipoBar8Zqs5NJJxBVZThMZdS3tSEiZ8e+fZQkOem4n1CySHtzA7NzU0i5BLl8CZXOyfFDz3LkyBGK8hyCTEnAH+Rdj2xDZ8uSCMg5f/wsXZ11mBwuYsoF1CodxUyOTDGHXC5HpXhrojNZEWTyIiWZiFGtxWiWk9MI6NUqZKkcKrmWhp5u5MUc8byEKIooZRLv6fnzG2b8xMSjODVWhmZGCV2yIYp5MqY5fv5Hx/jQZx4lmh9CXTBzpu8Yrlo7++69ndBUgZngEmcPTRGc8PCH/7wPh6YdtXyetJDBIavjenqWKqWFz73nKaLRNLICqPU6yiscxGNhVJoKHKa3FmMmX2LV+vVYbQ78nilmfWG62ppJpmKUsnEKKMjmRPovHuaNN96gKM+RyxdxOty0bK5E1MZIh5WcP36ers4aFEY9BWMEtdKEUiYQScaQyf773ZIa5FIRhQgF5BhUGoxmOQWdHK1SRF2AQl6grXcVQj5HPF9Er9dTyiZ4oPeLN8zxqYkPY1UYOXFyGH2yCrkyTVbr4fHPn+eOR29lMXSZqkoXZ4/3UdPiYu//r703D7PrqA+03zr73Zfeu9Wr1GqptcuSLVve9wUDxoQ9+AtJIAwESDIwBIZJJplkJiQDgYSwJRBwIGyOWQx43y1Zm23tai3d6n3ve/uuZ6/5oxuPyTMR3zAGKea+z3OfrlPndj/nvH3P79ap86uqO25g4kSViXyOZx4cxB8s8ba/20ZHegsJbZpKpEys2MTpYAbGDT75+/ewWLKJSAszbtHY3MRifoGE2UxzczPrt2zBsX26e1ezek0fI2cGWFgs0dLSgmlpRHVJruCwkLc5eXQX37z361iWhe14XHXF1VjtLsIokZ8KOHP4CM2tGazGekgUMPU0MVNjbnEBQ1FRDB3diKGJpQf+oRTEDYt4WlnyqCsYFfCkYMP27YRVh8DQiUajVAvzvGX7n/w/xYUa/7/497tAtqFG8T340hfvZ8Vilouv2k57eh0f/as2PL3AkUNnSWkWLtC7oR0tHjJenqFtdZybg0v45J98nY/ddQ+/99/voGNVA5GFLKebZmhL1TNf8bnubc309K6nNG9RKs6TTGZRNYO5yRGOHDvNdHmWO27+Hc4On2JqcpLujk5as+0sTIzT2d7BzMQ4q9etRAYW3V2/ixlroKWjhUcfvZebb7uYQCshAwijJXa+cTPDu8bRm6YoDRsMDJ2kf+dKEmYcO/RBVzGEiqZHCaNVpBGC5lNRBIqn0ZhuJedOk2qMMjZ5BMeZoyG2kvq6Pmz13BN86oqJplp87579tBezbN25mfbISt7/x40EEY8zhyo0RDWMWILV61agW4ITk4N097VwyebVfOHx4zz5z0UuvnGcVFNAYrSZ0/0zrEjUkat43P7uVTS3dpKbUgn8ColYGs20mBkf5NiJs4zOz/Da297NYn6BwTMn2bp5I6aepDg7Q31bC5WCQmtXN4W8x+reLoSeQhiCI0eeobsnje0sokUVRKLK5W/axNlnJkilAqojEcamR+m5qI1MNE3Zc1B1gakKDDWLZy6iGZJAOFTVCIoL7dlOxmbO0tyRYXzqKNXqDPXRHpLxJgq6+TM+jwbxeJSho7MkClNs2rGOJr2V3/7gTejZKLMH4oQhxNN19K5tJfAlh0ZOsW79KurjBo8NjxCMvp05r8h8ukj8TBP53iJtaj1TLUV+/Q8vJpVpZH5CEgZV4tEUumExeGaAkZFTnJyY5JZr/z8QOkcOHmBtXy8Tw3upWibprg6qlQpd3atIzJXo6V4BkSRjY2NUKpMEygz5uTKZxijRRoUNr+7j7KMziFSOmVMOUdOhZV0dDcl6Fp0Kqg6mKjHUelwzj6aFOKKC1GJYrqAzu5KzxZOs6GlmfOo41fIkaXUjjb11uFbDyxcEavxcXPDBPWqY6HqAMtvAjx4a4fH7T9KZXcVvfPQiCoZHzIDuvixDOYuKGxJOCbKtCSKxkJlSmWhSpzzrIQ0fZ0pnRW+WQPEYP7NIc0cD17z6DfzTFz7PZdev4+/e9RyGGZKKptF0WCyXiCuC+71dxNIW2Xgz9ak69j3zCHUtLUhF0N6xEtcOUY2QxWKJ/vVr+cSn/pqKM0E1XBosEqnkqd/SQSE3Tyii+EIh3hKydfVaJofmmDydZ3h0jLfe9SrCqI+iB+hhDCeo4IkQDQVhSEbdcaIpmFiYoKNuFZloiva2ZhaK0zS7F5/TY9xM4Psl/Ik09z0+zv3/coSMUcf7/+pKjo+OEtFCWldG2T8wT8nrxh+V9G/pQig2Rw4fJJYyeOCeR9ly02sQhRU0dNeD4nHm+BQdfZ3suP5m9j3xJNl2nc984DiaWiUWSRKNGSwUSkSBgw2DCCNEOjpOqcLBfftYs3E9IghZ0d5DpexiRU0WCwus29DPX/zVX1K2x3G1dmKZBLGZIvG1DeRnc+hWgkgqQiwtaVrdyeipGWaGipw6M8q73nMHtlHFF2XMMI4bVpGagGWPg5WzRBthdHyYjsZVpKIxujpXMJMbxcyvOqfHmJnEcUtMjuV4+OE8P/jW8zSYjfzOX17Kw489TX0mTms2wXP+HOWwC6bg4iv6ce0iB/bvx8rE+PM//CT/8e9up8PqobUtzbTuMXBwhJ5VK2m6rIPje/eTboPP/sFRUMpErRh6TFKt+MSEyqHUEJH4GHZJEtM0rGiM+sYmLMMgYkTx3YBoPEKpvEh//zp++MMfUSiPsGZTJwktgTvuIDotqlNVkk0NZFp0EvUVdNPg7IlJZs+WGJ+e4Dff+TpKeolAK2L6MXzNQVUFQkqEFTBYGsJq1jg7MkhH40rSiZW0tmaYWxzCmawF9/PNBT+fe1SPMOOUqIs0YSVDjOYEr35nF8eKZ5g/u8jNb91O3QqTG+68mdbOFRTzZdINOhkJf/vJb6IqcXRVpU6spVxwmPZsInaUeGuSSFXDzgf89jvfy/fueZoPfX4nQehSrtiUyz5RM44Tuhw/vo/F+QU0XbL3wLNYySiZpMnA4Ak0RWBYOq5jo+LT3dPFx//8v3HXW97LdTt/jeNPn2Q+rzJ3pII22s6tl74B/9QKhh+QrFFuozXdw3Vv7uXGOzZycmCIgw8N851PPczD33sGxRcYKksDolDQQxAySrS+ESOeRDfbqBYjKFMaR599/JweLU1nsriIYmtYSRuZVnnTB9cxUBwjrmTYeHUHqVaL7dfvYMtFm5mtLBLNCBKBZNcTp1CVKGqo0BBZC7bCrHCWPLZkybgR/JLKNVffxJEjE7zv0xsp21VK5SqFgoMIVezA4cknf0Rudg4/KLP3wF70mI6h+hw6fpBCIY8UYBo6U+PDNDU38tH/9EHe/ub3cPu1v86hx44zu6CwOBCQXOjlktU3UD5Sx8yTEfrErbQ3dHDNG7u5+c61HDl8imOPTvL9zz7Jc08fQwtVdEUSyioKCmoImkgRb2km09SCbrZjl6KICZ3Mz7gidDTmKx4UVVJ1HhUN3vpfNjBUnGF9zyaMhgrpDkmyrY6t2zYxHeSJJgPSqJTyBpo0ET7E9RZkWbCgekS8GJHGFHVWClEx6ezuY3K6xLv/Zx9hCKVyFacMdtml5JTZvedhquUynl/i+MnjVMtz2JV5FhbncF2Xil0lEY/w3L7dtDY28f73vJu3vfk93HzFmzj4+AnGJj2csybNwVZWZTaTeyHJxOMRusMb6O1axTVvXMmdb7mFQy+c4uSTc3z3s09z8oWzGKhoAoQMUdFREehqknRrK22dK9HNdgIZQZ80uap/y8sWA2r8fFzwwd1zfLANXti9j95Vrbz+xm2EnRFW928gavn4lkbMbCKrarSvbKAadYhELJRKFNOMUFzI41Z9fvsN/4m56iiO75EfWkSqCvN+CVWTOAtFVjZ3MXG2xHs/fRMuVRRP4Drh0oAiO2B+LseRoy/QvKKTQrWIEFWaGldw6PhRBgYGCANJ4CvMTiwwv1BmXf9m1q3dxn/9r5+hv/caoloHsxNlHn/yOSoVn007tvPtH/49JwZOsu+RMYoLKum6LN3rm7np165m/YZ1mNUWgpkIfXVXYU/4ODmF+lgGw4sQjafQUipqsp6wK0PnLeduceZnc5RzAUPHTrKqp5N3vO1KzN4MfWs3YJpVWns7yURb6GtvQ4sH6BkNQ9MpTLrouklhvohnB3zkvf+D6coYdhCQH1pEixrM+SWk8LFzRRpjdYR2nPd8+joC1SWsBiA1opE45YrH2aExRkbPkmloZrFSIGJ4JJJ1DI0MMzo6zNDgWerrWqgWXBAW11x9I431nfzRH/0NbY3bkE4do4PzPPfCaSoVn9aVnXz13r9h4OQwBx6bwC1HyTZkWbmxmRvuvJLW1lbMahv+tEV/47W4MwJRjVIXTWP6UVLpRrSUCrEMYXeKYnv5nB4nRsfZ99QRjj47SFNTA+9/31XIFQlW9fahagUuv+kaUvVd3HTd5WjREDMRwVJNBg9NoqoqxbyD74Q8+K39TJZGsIUkP1wknk0yOz+LHVTIWHH0UEdTsvz2J3citZBqxUPXIphGhHLJ4cTx08zl5kjW1TEyOU02pTEzU+DYwDHy+TwnTw2yafMlOG6IFUlz43W3UJ9t5yMf+QR1yfUU5gz27T/NiZOTOI6kZ+1K7vnxFzlyaJDnn5hg7OwMmfoMPRuauOX1V1NX10AiWEkwY9IdWYc3q2AGabJWBj2IEk3UL30ezQbsjhgH7AMvZxio8XNwwXfLlPM+f/UX36ShawUb+hrYeNUW5vVhBo8Nk/QjBHaZiYkAIxGwItZEzpohYoTYBR9VGoSah+cFUFVwsBgdGqcrmiIIIR6P8MUPfY53/vld3PaWuzi8/0GMIM07P3UNEw9O09e0gc999VEsI8LMzBgTsyq69hSJpM/zx31On3mCi7fvJJFKozBG+8puqlWHKCHDo2dZ2dPD4NQATrmEXy1hxU0sr8rlN13HnhMP0LulBd1QiUUTzJaH8CiSy+VRtTipRJKyt0gQ+OzZ9wx24KDMVRg7M0MYqOwOjiCExDAMBBCJnLuvOGrV8+mPfIW12zazcWWK7dds51juMIf2H6M32YWuBAxNVaiGFbpEK+WkTUyXVIMQJdTRjADX8QnGBLMFm+hMnjrVYKmrQ2HP13Zx2Zu2cflNr2bfvvtIRFv5zU9cych9o+zov4a//sKPMVWYmR5jdCokm2lkauYoj0rBocNHuXTHlWTq6mmqU2lp68Y+O4IIJMePHmFlTw+npwfQVIkSuOiWgeWVWbe1n1OT+1i3rRNN00hl0kzlT1HFZX5qGKFEyaTSlNwFPC9g17NPYgcOU+MLHH/hDKpisev+w0CIaZoooSSWjJ3T44r2Pj7+p//CHW+8gZZGlW1XbeHQ9HGeP3aQnb07sXSHhYmA3GKRplwKIQSmEtKQMhGBhqp5uG7Ik9/by+rLmkjnyjSYGRZx8E2H4HSFymqNK6+7jd0Hvo8aprjrLy9m6oFRmqJr+O79RyCsMDE+zOCoR+eKbg4cehojYvD8C0e49pqb8MMJGuvqSbe2cPr0GZxqmaOHD9HZ0cHIyFlSySiBs0A2ZaG5JcyGBAv+AGu3dCCBhqZGpopj2H6e+YlhFBElnU4zV5jAdX32HTmI53mMnp0GJKqqsev+w/i+SzwZQzoe9U2NL1MEqPHzcsEH92yqmbCoEvFC1u5cx8z8HPXNHeQJueSK7YwV9mNYBqu6+pidLlGpKrRaSar+GKgKaqAQKh6GahHVktgVh8b+PsYqgxRjAXe89yaiagPlUDB0tkBT7yAJI0XLDfU4sxpB4RBuZh1JPU0ko/HsUw/Rv/1SZmZ3UZ/O8szT92F7CpdcdA25xQUymQx19Umcwgx7943z6EOPM7UwQ3tXH83RCIHh870TnyGdaqTsjNOQbCNnjaNGBXFNJakk0CwNISoE4VkMK4Jt20RQGB+eYUVHFhFJorsWmq5QmCuwWC5Q8ZPn9GgYBpEwiSjbtK3fyqmzZ2hv7cSx4KLLLuLs3H4UTbBh1XompwrMF0vUN7cRM8tLHqUkUH38qkIynmH07CRrr7iW8eoQ5agkukoQMxvJVTyGTxVoX6OR0NK03dLIoUNDCPc0IrWSSJCkVBzkgfu+w/pLdvLMnmeImRH27H4QzYqzad2lFEqLZDIZkpko02MDPPLoCXY99Syzi/OsXL2BBsvCEVX2L3yHVFMLFX+KbLyRvHUCMyKIGSqByKCaAkWx8YJBdNPCdV0SKIycnaa9sw5pSiIyihCCxdlFFsslSrn4OT26QYkoScbPHGLjZa/j+OBxGuszkOmld2M3Y+XDVL1F1q5ew/xCBc00sMw0bQ0Coe1CDUMgwC2GJJNxDuw7xI1XXo10XBxV47mRx7lt01uYL9tMjzg0d9hovkXjVQ3s+8EpVDFDNdRpirZx/Mwz3PONr7Jm8xae3b+XiKrxzJM/JJFtZF3fdip2md7uLsZnQsZOn+CRR4/wzJO7WawWWd23iZZ0grnSJDOxfbQ0d5O3h8kkWphTj2BpCjFNkNH/t0c3OIOWMQkISfswP1shndWRZoyEGicMI8xO5bBDm8rUuadOrvGL54LvlnHCWYL5CJsubqd4dpbMqpCZ0iLbtl+EYZi0tHcyt1jCV1yk45FMxQjVgKaWDgAURUHTNNSo4OgTp6lUbPKVBeZmbHwnih1zKZenqeQWuP21b6Yh0oQmBbg+084+PvJXH6a9NYIRKRC4Ve569x8wPzdDOpIlmaqjvqWdgVMH+NY9f8/XvvY19uzZw5HnXuDMmTMcO3qSrpWraYxHuXbdamaMUXKZI/R2bAZHoym9mu7GjaSNTmJaEvwsm1deguJlUYM0ipdCl2kSZjPJWBfr12/EiEWIGTGsiIKv+GSaMvStW0NP/7kfYJWcGcrjgv4tzYiZIh0bEhQCh8uvuARdixJNpql4Ia5cWqezuaWBUA3INrUhhEBRFFRVRY0KwjkNQ4+TryywmAvwHYuGNQ2cHTxClJDbX/9WkpE0ge0hHQ+RGOIv/vZjGEqVbKNH/5qtvObNv8n01ARxNUFjUxuZhkaeP/gU3/jWF7n77rvZtWsXpw8f48SJE4yNTrGqr5/WdJLrN/UzF51gMXuC7RtvIKhAS3Y1W3qvI6m1E9OSuJUEG7q2ElFaUIMMJvXoMk3caCIR72bDhg0YsQhRPYquCwLFp66ljjUb1rBy3blbnFV7gYUzVdbesB4zX6FzbT1WOs5F29cjgwjFigdqFJcKjueSysYINA9PLo18FUIghEC1FLJ6G40N7WAIykUBis7qnX08+vD3ifkB1972OlTTYGZ8BsMw6Fzj8MnP/CG64rGq3+KyS1/FpdffyvhcnpgSJ1PfQjSd5Nm9D/OPX/0b7r77bv7pnm9x5shxDh8+zPTUPH39G2hJJbnxog0UMqOUGge4+uLXMjOZpzndx3VbXk9cacM0kgROmjVt69DDBtQgQ8JoxRRZEn4j0UgHPT1d6BGLqB5FUeXSdddex9pNvT/z81jjF88F33KfdQuUygUiLQb1KxsYm7DpWmXgKXkGzkyz/eLNGEzhlcvYqkosohLxTWzPxrZtLMvC8z1kIBkaOkO0W+Xs2SkMKXFtD9u2KEibpx7fx/Yrd+IVVLSsycDhw6xauYbCYo5snSCnJFksG4ycPcOaNVvxvCKaZnPq2EmKBZuKPoOr5Bh/4ACaTJGKJwhCuHjHJVh1Bk+M/ZhkJIpdUAjyHkpep7G+kYhiUslVCDQVxfM5fWyOMNBw/YCmdAd+SeK6Li5lhOIRmo2YukqlUkYIiGTiaEoM/WcMVyiEAboh0BsU9NURhsehrt4jL0eZHiqyes0GconTqKEHsQiaHhLzYhwbHMBxHEzTxPckSPjBvd9n600bOXs2jlMsYsYjLCyEdGXinBwaJdGQZeZsgda+Oo7tPciGdZvZs38Pb37rDfzwvucZn5qmeUUX/f3bqFbLbL2one98/bvLHqdxlHnGHti/5DESQ7Mi9PatRksJHjh5D8lojEpeMD8yQ53SQLOeZeT0aapelVBXUX0YGsgtdSkJjUS0Ed8N8TyPgiyhqD6OkiYTNSmXy2iaQiQdRxXR5Zb1v818KSTbFMdUTOxVEsPPMDN3Cr2zjD1tsrp1C+NGlXRsgULKQFVC4k6SH+96DM/zUBSFUAqQkr/71Ge54tU7GDxtsJjLs2JjD/k89G1Yy1SuiOo6zJwu0r1xBU8+uJtLdmzlnnv/hQ9/5Lf4xy89yujsMDf0dNG3eiOeY/PO37ye97zrQxQLSwtonB5/mjPTT2DKRtLROKpp0dHViRL3uPeFu9FjOpqtc+bQCdqjHayINPDoQw8QxBx0LYEaVhkbrOCFAl01UY0sYRjg+VXcgoOq+XgiixnRKVdKmKZONJFBDQ30sDa3zPnmgg/ug4fnaO/XsWcKBNvLxGI2Z46Ns2PrNohNoClJBob3sLHvNoK4RI9FSQnJwwcfQ1d0Qi9EVxUCGeIokpb6Vvbuf47L+9dRLZSIRxvxvEXK5WlK+Srxxk5u2tRLwgj48j9+g7vueheXXncRowPTjI0rtHWt4qv/8Fn+6uO/yWe/9BCHTxwDRZJMxLjq0u088dRTBMoCE0UH4Tncd/89oCiYQsELVVRVZ/TkSUp2nsPPnSEIAgJ8NE1DGBowjqpJFKmghicxzdTSqvSLYyBdVvav5NZXXUsQD5mbKaIWfdyChy3OfRP24Lf30LJaxZktEmytEEsG5Cdc6npbKRkTmMYWnt7/17z15lcjDRc9Fifmhew6tAtNaPiOj6YIQinRYjEaM03s3f8cN158CZVShfpUB4XSNPv27uOinTfTvXYrt2ztJ6b5fO6LX+XOO95GSrXoX5vEGlZo7VzJP3zu03zv+3/K+37/s5wePgOKJJ1KsHPHFp546imEtsBY0UHkqoxOnwGhoRPiizyGZjJ++gQlO4+pmYRhSIC/dIdhGUg5hlACNKGhh0fRzSRCCBbyo2gqbL5kI5fdsJ7FxRKFvIMouLgFB0UY5/T4rc89SMMqHbXsQswjF5xECyxEKMl5x+mybmLPD/6J269ci6L56FEdreAyujCOIhUIAxQkQlUwrRiZZIaBk4Ncc9E25ipVMvE2SuUpdj/xDJt2XEv/pku4dlMfaQ2+9b0H6OncCEaV9evjJMYbaFrRxXc+/Rd8/Wsf4mN//LfM5GaRIiSTTnPJto08/uQuQivHyKKN8Jc8CkVHkwGhohGPpRkZGKLiFjBUAyklAUtjLnRdJwz/t0dD+mhGAonNYmEe01DZfMlGrrz5ciYmprErkrAg8Ys2qlKbW+Z8c8EH96d+vBc9HaEx28zRg4N09KdpztZjoqKoDr6/SG//ZszGFEauRFmZwPF7eP7gc2R7VjA7MI9p6XiqR0TziIQZ+vr6CA2Lsi1QLR/dihGLG9SHCoXA45GjewjVOt5wx/V4ocuT+5/mhu23Mr94mv27nuEtb347H//kjxkfPYlpRQkJqNo+Dzy0G02zkAKkUwUhCAOPUKo4gb3U+nWhXJgDJLYqEEKlri2NaYVoCR1Fl1TLJaKxFAN7x7EiLuk6lf7L+ji+b5izZ0t88e9+iKJZGCZEIhEUTcUtzJ/T49zIIlrKor2lk2cff561F7eSMhpQ/BBN95BBiUxzB34qhlVUWAxHiVVW4HkOifYo86dyRKImVenilRZI6PX09RnkylVsxSBjekQSdUQtlYtaOzk6PcU3Hr+PaCLGr736WpSIyrd/8C/8zpvfx4lP/TPPPbubO1/3Rn7rnZ/j5MB+ghAsy6JUdnjokT2oaoRACqRfRigKnusShj6OdDEMg4pdpuj7qKqCLQuAQlNnFqG4RLIGqAHVShHDsBh8fgLDtKlvMth67SYO7Rrk8OFJjh2+F1WPvOhRCijOTpzTo7B11Iwg1ZbiuScOsXp7I9FQJfQlZgwUfDy3wKJuMDU0ipo0kbk4bSuasBqGKU8uHb8tHcrz82SjTRSMKoPDYxidaWKGSjTdSNTSuH79FnYNnOAL936dliaFqy/dhG808ejex3nb63+Dj3z00xx7IcPnPv2f+Z33fomh0/vQdBPTNFksVHnksX1IoVN1VAgqIASet+RR4CEUFadSJgwCVFWhKgOEUGnprqfslsg0RUFVsKslFDSGj8yhGUV6N9TTuXkdJ/aOcvjwJIee/waaYWFaKqquoes68+NnX7YYUOPn44Lvc/ccnVBRMTo0ervXo7oJigsFfCNEjUQphhUaOzLYizp7Hj5OVLYwcnwULUhyx7U38JpbduJ5Lhe/aiVH907x4d/9c6YmRlAjJnahRBAE+OUK29ZtZO/Bg5QXyuSLcYaOn4VUkkp+ntveejuPPfMI179+B6bhk5sa4aYbbqFtRReaKpFSIIMQ33epVh08z1tabd53CRXQdYHnstTtUKqiqDogqJQD3LKPV5RgG4iIRhh6tKzMEqmz2HJLL1fetpGZ2TIH9w0RazbZeW0HueokhcVJpsYH8eUc229vIdN/7geqs7NFQqEgW0K2brkELUhgF0oolgGGSTmosnJtK86ixne/+jBJpZXZ0QUy0RbeePNtvPbWywkClx23r2LwaIF/+PTdTE2MICIGdrlCEEiKc7Pccu21fOcHP6C0WEIzu1iYKBJpayI/O801d97AP33ja9zylp34fo6wUuCG626go6OXiKURhiCDEM9zqFRsHMehWrbxfRehKaiqxHUkxYJNuVgBoeI4LuWSj1MJsPMhzmKINBUkLk1dGRINUbbc0suOG9YwMVVk3zMDRJs0rrt1Nbnq9Ise61t0Lr2jndTqcz9QnVzIIUIN2RSwYctWMrEm1CBYultQNEp+ida1zbgVg+efPklCaSVmZGmIt/G2176Om265Agi45LaVzIxK9j55GE0JUZNRynlwvZD5sTGuuOQivvz1uymXqjS2bsL1G9CbMowPD5LorOPjn/oUV75qMwdfeJSHHx/lVTffRPuKlZiG8lMe3YqN67rYFQffd1F0FSkD7GqAU12qF4pGteJSKnpUSx6VBR+qEQJLQdV86ruypFoSbL21hy2XdTN0dJYDu0+hZn0uvbqDvD3LYn6KqfEzdKw1uOQ1bVgd587eqvGL54IP7jP5Ak3lJKGqg1KiIZ5CI4amwNSUw2L+LJlEiqAMbtmloZykPtECMZ37n3yIHz+7m2tft41DDw0TVFX6VjUTz2SYmJmmgSKlhTyOFiVMZ1EiFRYnzuBQJdXQSKUksecrHDs5gJ9ymZWSN773UiZKwxzavw9Dj2EYaVzbwXN8Ag8Cz0dKD1UJUXQVoYHjeEi5PEJSqLi2h6qqWDGNtvZO7KrPts2bmdxTpDjroSgG2YxJOhVjrrzAzlf3Mzs7y+yZErueGqSts4lXvWEnyZY0M3mX+z7/FM/cd+5laqteSJ/RTtn1UXWHlJ7CVFMIxcetmhRKwzRkGvDLkt6eldTlMnS2diESgh88/GMeee4AV792G0cen0A6KjFLEM9kWCwW6TRcnGIZLdXEydlZXJnD8ipUwzKhplLIO8Rci5mFeab8ccJsmhvf0s+R0RdYnJ0hkaxDUWLYleqLHgkl4KNqEqEpoMqf8qgoGp7jY5omVkyjo6MLu+qzY9t2xp/NE1ZNgkDS0BgnmYhQ9MvsvH0tudwsc0MFHn/kBCu6Grnr3a8m1pjgyIlRvv/ZJ3j+8YFzelQ0gw0NqylPx1B1m8J4lUyqA03xMPUs5eoYre2NUBFs2ryBxESCbF2SMGZz7/0/YO/J59lyTS8nds/hV30GTx3GF4Kq41CfWCSoVIk2tzNRrpArTpBWA+zQZXZmgcV8hdX1q8g0NZBT52jfuoE3v+9aDp/cReA4dHavJgxNquXKix6llAix9LxF0VVQ5dLc9SgvvlzbIxI1icR1urp6qFYCeuvqmXx6AS1IIf2AlpYE8WiMICLYdstGKuU8hXGb3U+foa2znnd+4A0YmQgvPDvEj/9+F2deGHlZrv8aPz8XfHDXgyhNHSnGT7pMnJgiP15HU3w9GWMlQyPjnBkbxy/5zDmLdPavJiwXUeIeVixL/5pVKMLhke89j10uETgBdStSGFoK1ylwcthGhnECX0fogsVqmX1Hn8OfKaMYKaRTh9YcxzQMWjobaIg38cf/5W+48s2reP17L8PxfKK6iqJAIH2CICAMQxQRoihLahUFnKoNoURBEIYhYRjiVHxSsTSz+TEEHs/s3ovjlymOuzz/4HE838A3AtpW1xMYKre+9Ure+AfX4lUD8nMVDhwYonVFhmtu2kwiDZ19Hef2KGOYKQV7NsnU8Rkqcw10Ne0gq3cxM19iaGKSsBIy5y/SsqqLmOYjozb5ks2avm4Cv8Qj3zuIUyoRuiHxhiSGlmJifJDRWQXPtXBsQXNHG6Gh8q17vo7MOUSTTQSlOrQmDc/x6NvQTV0swyf+55d54+/vZNVlURzHwVIVNE0BRS7dTfk+igjRdR1NVxBC4toOQoIqlBddV0suyXiSmdwoSJdILIkfOowdmeP4M6OUbUFghrSsyuLrCje84QrueO9VOGWP/FyJhx9+jo6uBm573Q7iSZXutd3n9Cg8E2k6xKN15AYKVPNZOuq3k9LaqDo6Y9OzRDyLnCzS1N5GKhbiKgXGZxfo6W6jPF1k76ODBLaLiiCeTmFoMU6dPIw3ouA6OnZFUNfaSDyb5fOf+xu0cki6pxe/UI8dKZGbz7Fx2xoSqsYXv/At7vjgZsy2OeZmp4nqS6t2KZpYckSAIFjKGNOWMnV810Nhaf9PPFaKDvFonOmFEZAOXeu7QJcM7BnmxK4R5ooeMhrS0JUk1Cpc+dpLufyN/RRzZRYXitx//x5W9q7gzrffSDSu0rFq5csUAWr8vFz4wZ2AxMo4tm2zqn0VpqoRqatHCZvQdIWrtv4G9akEYyfGSTZEyDVpmM316FGN3bt3A0vpkOl0GkUNidYL8nNnKBd9ND3AdSqIIMQpe2xeu45ULEtnQmN0+AilXJGC7fDkk0/S3t3PCwfvJ6ZbfP6vv4YXzvKO370Y319em3M5xe0ns2xqmobv//QUvFLKF1MzhRBUKhVCV2D7FRbLeRLxLKVSCRSDff98nN3fHsCvCFa0ZNB0GDk7y8bXdON6FSbGZ5mbcTg7OEPbyg284ZYbzunRlAH1a7PMLcySTWbJJlMUg4CI3s34xCjb+l5PwjJ45ke7sJIqEykfN2JgJi0OHDiAoigoikImk0HikmrUyM+dwdAS5PJTOG4JJZRUCg51sSSNTSvoSZmMnj1IfmaORS/g6WeeYkVPP6XSMKVciU/82RdIZSW3v3ENqioRYunL7197DILgxf+jlEvBX1VVNG1pqT7btvFtiRNUGR4fJGKlqVarhB688K1TvPCjYXB1utob0A3B1MQim17dieu6jI1OMjfjcOTgCB1r+nnL7Ted02NEhtStyjJ0dBdCSNqbW8lVbJKRPo4cOURv6/UEeHz/q/ehRCQjCYeypqAYcPz48aX1cMXSFMRCDUg1q4ighKknOXLyKGW3jBJKpC+wCyVaOnvoyRjkpk5g5/OUQsnY+CjN3WvQrSqFuQpf/sLXWdEZZ822OPUNyReD9k8+cz/xKKV80W8Yhi/+T1V16eGp53nLHisYpkBVlr54A0fh+L+McObpGUwRZ0V7M4YpCDyLS97QR+ALRobHmZspc3DPIM29PfzGnbf/X17pNV5uLvjg3pJtQNdiyOI4w3NjdKxoIZkJETJKc2sSv6yilDZx9MwwqhWiOZJiOEOgQG7SxXc1NF2gRlQuuq0bu+gzOjJLsVLCzJjkxufwypJQNZFaQFX3eWbPQeLJBohEkDJCg9pEEASsWdnO+z78LkQ5xZe/+T2cRMAffeUDSxdQKIEQRQGhmAhVYCgqS7MZawghkTJ48b2KClJ4eGGAocbpblpNqTqHoi9dkHrUIkWSZ79ykMnTDtGIIFUXQYQ6l965k/d/9NdxKlWmx+fJz07x7MDxc3rsblvB5MQ85bGTLAY56urj1DUqaCQpLM4g3ChycT0iGkcqNpoHC/44XhCQn/JwbQVdV/AVl003t1OYKTE6MkuukCe7IkNhIk9QFYRCpWtlK44WcmxgmES6hVi2gSDQ2dyxlSAIqM7P8Wf/82O4+Sjfvv9h6tZk+dO7P7T0BSl50aOiWkgh0aRg6UZIA8JllxIhQRKA4uPLEFNL0F63iqqz5DGQoJkGeknnmS89R24CElFBPG0gsNhx5w7+w398K5ViienxWQpzszx95Og5PfZ197B71wHK+SRmVqWhJY6VqmAqdRx54XmiopGo20ZTbxehYmMEGhPFIUIRUpiRBD6YpkbVLbLh+lbmR/OcHBjGDXyaVrcg5z0CV+AGgiuv3k6owdjkPKhZjFQ9rquSVZoIXIdDz+znnb/7dkrD8NC+vVz26ot4ywfvwDAMFAQQQqgjFJNABqghqKp80eNPAr0qFELpv+gxZmQJqwGOt7DkEYlqmFTGXB77/B68kk46oRFJqMjQ4KLXbubX7rqR0mKJsfEZqvlFHjv4/P/7xV/j/4kLPrhvXLeax3Y/yC23vp10fQpNlczkx1AVi46eFTw98gMCdZZ0W4yk4mGaURbmT2JFErSvq8crBHSvzeIFeXzVI5VJIBWThuYswjMRqk3Rr+LZAcmGZiw1wnTJQQ3KuEoBQ42z4+J+zpw5w/ceeJQJb4ZtO9ZRGAz45te+je0V+eO734/QQoRQEWJ5mTZNBVPBWm55BhKkUDA0nWQqipRLi1x3t6wgHU8Qb9QJXdCFiuqpaBqYRoJQ1dh3/3M8/dURUrKBxkwMEbocOXmQi29YBUqO/EKO2Ympc3psasuw6Mxx+x3vIBpLETE1CpUpFBKs2rCKxwfvJZ620VOSuAioOiEjp49gmEliTSpu0WfVpizlSg5hSfSovuyxnsVZG83wqEgXGarISBRDjfD8iUEMKgR6GU1EqUsZDI8Msfvwcab8WcyIz8SRAt/5xneoeDn+5Kt/AIp80WMYQqgIMLWf8hgiMDSdeMJCAqZmsrKtg5hpIWIuItQwVR3V09ANgWnGCFWNJ77zDE98ZYR6s5X6VARFegycPc7Wa7qo2OPMz84xN3XuRcOMpCDVGOPqG9dRKgUYhkbJnkaESeo7Mzww8A0a4iZK1CMR+Nh+yJH9B5dmA7Vcynno3VyH65VR4gq+9AiFSTRhUV60EbqDG/ggFRY9G0OP88ATz5IWLqFRQWCiBRWKpUUWwpB4Z4yzR8cYfS7Hww8+QqxO40//8UOEBCiKhqKw9D9RFaShYmk6Ukr8cOnLUVc1rIiGUBQMXae3vQtdVQkVHU3RsXRt2aPEsqKEquC+Lz/M418aojHWRjZhosmQXGWejVeswC6OMzs5zczUubOOavziueCDe54yV2y5jinvEAf3HcANCpRzcwSihBWzSMZb2bP3xyTrNarCYWJhjh/fdxwtotC7vYdt1/cx8MI8odSpuCX2PX2ItBUlkIJHn3qUpvoVyEqVWDSB0A0cb55Zf5qxo0N4FZNSqUSg6+iqgZ6OcGbsEJOFOYreFMU5g+9+/z4y2Rj/+esfxLQEoQZC8dEQyMAnCEJUoaApKpqikkrHsD2XeDyOGY1QtCtMzkzTuaIdVUiUqEVCNXCrBRbLOQD8sMiGK3oZmx5nfCKPKaIYehRNTdC/egOX33IdO1+185we1bRFnHrmxFEGDh+m6hUoLy4QUKS1rZmoVc/Dj3ybxhVRHNWlGjh85+vPITWP5rWNXHTNao7vn8MLQSqSA7tOkrai6KbJ0ZNHSSUbCEplAl8QiccJZIGSUWDmzBSlskKpVGFsdhZdNSApGBo/TMvqbhSzytw4fP/73yWe1vjw3R/AtARSFwjFRRdLueG+J1GFgq5q6KpGMhXFCwMSsSR6zCRXKjA7n2Po9Blk4CEiJhnTolwukCvmEULgBgW2Xd/P2ZFhZmbLGEQwDAtdS3Lx1ku5/JbrufTWS8/psb67kfJMyKI2R3F+jqpdwCkW8SjQ3bUSw0jy8LMP0tVTTxgBH8n3vn0YLywTbYY1F7Vz8Jkp7EAiUBk4NIkpBKlsioVShSBUcCpVPFeQyTYiVBtbKzE9MUqxrGDbHp6qYlfKFMhxbGg/V712O3rUZehYgb37nkGNB/zZPR/DMCFUfFCqKAiE9HEdHyHB0HQURSGesBCauuQxYjGTm2duYYEf/eBHuK5NYJpkrQilSoGFwiKqolPF4eKbN3J6cIjpmRJKoKLrJoae4vIbb+DKV93EZbec22ONXzwXfJ67GleYGszhR8ukY2kUKRldnGSTzCMDC7fqIhWD/vV9KIZLwkjzwv5hbrmtByPt8/TXjhLaDp4S0pxMMlSax/cEqjTY2L+JtBLl1PQwVipCzJdcdtlGHnpoiFi6mSfu/QFX3HkjJa/Cj775GNtv6WGxoNLensKMrGJyNMfIQJ7HH36Qq6+/kT/4/Lv41O99BdvxSCaWWpt21cO0VFxbkMkmcZ0AVSy1qCqlKp4tkabCkROH2HL7RoThko4kcZQAXRVEEnF0Q8UObGJGCk3qpOrSLM4vEOoereub8WWI/zNW1JopzdJpNTMyNU1TtgkChxl7AYc8yXgTswWFWF0dzb3NKKZC3EhQLOikrDSrOwUPf+04ge0Qa9GpMyKY0sL3BCJUSMWypIlwYuI0etxAUSXr+zt5dt8M0UQjQweO0rZtNT39Pdz9Dz9i8/UdlKoqdWkDVWSYGi8xNexz+Pl9rNu4nT/88u/y5+/8HJ70iUY1FFWlVKyiapLQF8RiEXxvaSi/qgrsso1flgRGyGxums2v2oAeDcnG0pRDB8PUiESjqJrAwyOp1hHVY8QzMWbGpzGNOKnVTQSq+jM9Hjh6kOs37uTsyDCZWBpNBORkCVvmWbmyj/kpDTVhkG1M4+kG9Yk4zY09xLUMG/t6OfDgMEqgsPqiVhJCYCkRDNUEX2Xw7DA7ejdxbPwkZnQ1qiapzxgUywpBzMKcKeM16vSsaef++/bQtCFKGAoSdVFSGYPTA3OcOrpIS+tRmlr7+Njdv8cfveMvCYRKIm4gUSgsVpcSAHywIjpIjSC00XV9yWMlxNd9OjavxchaWAlJOrLk0YoYWJEIuqHgSo9G0UBUj5HKJhgbHiMRz2KkU3j4yODcC7bX+MVzwbfc58am6VrXxNjgDKZpEGgei3moOEXm5hZQgioiJvDRCEIL1TBJJHswYw2MzE6hxFWsWJQrbruYg3vHkIqK61SYGBkmkrA4OXcWqauUPIcwgEpOoEUVDh57jsuu2oFpOwRWHM8vU59uQ3iS5t5e6lrbWbt5FUroMXwox67Hn6C1LsEffOYdeFqR0JFUKy5OqYLrSWINIUQqoHuosRA1LsimIqiKT2tdlvx4wMknT7Dv3n08+u1HuGTNZurrMmAspVTGInFQBaFhsLCYx9MEzQ3NKBEFVVFY0XbuPHfTV0mtiDB+ZhorYuJqHgsLLrZfxPGrqKGHK1x8aRFIA6kIUpkeoqk6AkOgxHXMRILLbriEpx4ZIBQKrlNhcnyCru42xipTJNIpXCHxPWisayc0An784HdBV9A9h4owKBZzxI0M0gno37GDTHsn19xyGaHjcGz3BKeOPk9dwuTDX3gXoeoT2CF21cMp2QjFINEEatxZer4SBTUW0JBOIBSfuAL5CTjzzABPf2M3D33jQXauv4i6dIpQ81ENDcuILA18AuYW8sioQUNTE3rcJKIbP9Pjmo5eSHjMj89hWAae7jI5MQeqgxYRWJqOq0dQtDSB1ChViihGA4m6OlJNWax0DCsZp3vNSp5+5DQBEqmEzM/OsWXrRibcadpa26kGHq4TsuOiS3GFx4EHD7Fn/z5Ur8Jsxcf2XNTQIqgE9HR3oCTr+fXfuhO7UOL5p4ax86MkTMFHv/A+FFPBLTs4totbdjAjFulWFaFLFDPASAjUWEBTNgWqTxTJqQNjDO0+xWN37+axex7h8k3bachm8JWlwK2rxvLAJ8n0/AJqMkqmro6IZZC0orS1RV62GFDj5+OCb7l31HXi6A6RpEHSTBCxUuixGAgNEarMDM+QaLbAk+hSwfOmiccEuhahWKhiiID2HV0YFKjmQnRVxa86iISHUFy8qkLS0lAciY6JFUmStARG8wqEYTM+OU/Cs+nf0odTVokkGpkrLBDkXbK9TXzw9pv4yz/7PENH53C1R7hs4/X897//Mw48sY8n7n2Kal5iWD6xVBKkhlRtTKKYwiQZT7MwneNVN9zA5/7hGwSBh25ECHzBxz/y9yAl0bhFEDpoVoTX33kVZrtPIXBY1bqJRebRbB98n6mR0jk9NkQbKYUFGlqzSFcQM9JokQSqFkH6ktNHT9DSYyID0FyBmljAjPhYRgLPlUi3Qt+lazBlEfylGSn9qkPV9ik5AqUQIRaJEPNBDzXmcxUaEiaJDZtp78owMzlLKghYv7kPVSaIpy1GpsawPElV+Lzng3fxyY9/iUP7FGzVZXXrZv7sCx/jB9/6IaefPUkpsLAiCpFEFBmq4HskSOC6IZFEPaE/S3d3O0dPjhPkPKxIEs+G//YfP4siBNG4hR/YaFaEu95+M3q7wfjCIqs7NrEoc2hOgOvaTA6fezbDBHHmqvO0NXcivBBdiRFJNSFDAyEle3c9Qf/OniWPvoYRL2JFQxJmHZFIjuqix6ZrOzHcEiI00TQF4boUirMktHrmyy7lqEuqrRlD6oyMjVMXM0msW0t9m4FbgWJQZFVPOwmrDiPmsGjbJFQ4Onyad77vzXzuM19n95ODbLomIJ1q4Y8+/UE+/Zd/SzDtUQwjmAkVI2pgWhUCLyQuE5SKNno0jetP4ZZshCFYXAyJxlJUCiF/8oG/Rdc0rKhBOQgxNcn73v9GKgmH8fkFejs3UVYKyKpHtVJhaqTWcj/fXPAt92LBYHpmhGLBYXY2h6ZDtVRmdOwsZU9yeuo00ogs9dMaYEUlhh7HFxLfd5cGvWglDjwxTuAv5UtXyxD6ERTiHDt6gqQ0WFiYIyyHoCvEEgLbdihXlobE4wl61q1BhgpRw8S3HQ7tPYombZ5+4QjvuOtNVB2bwpjF2bkB6qyAdZdt4C2/8WuksgrRmI5b9sjP5vErCtWKhyYFJ4cGEELyj1/6NkHgIaXEcZwXU9YAPBd8DxQPfvDtp/nhZ57muS8d51O/9z/IKjESgUEunyeaPPdcHorMkstPMTE+RxAIND0kcD1mZ6eZK1SYry5ANEk0poEp0I2QqJFAMw3y+QV0zSBQCjzxw9OI5W6lahkCz8LU6jg9PE5GMcnlFhG+hhmLIdQqdtVjobiUlhc6klX9awgDBV0ouJUq+586TESH3YeO8eY33EGxWGJ8wKeiLBA3XG5+4628/bfeTKZewzAFbtkjN5PDLYXYtk/KjHJyaAAVh9MnJwkCbylvu1IhCAKEED/lUfUF3/zKg3zrz3/E4btP8okP/HfSREhJC9txiGfO7VGQplhaYHRkkky6AU0XKH6I41QpOiFSD1CVKOlMBMWQBKGNoZpY8Rizc9PE61Q8scgj3z0GqKiqimMrhIGJr0SZzpdJqSalfAnhK1ixNEKtMrU4zVzRZ3Z2lsCVJOuz2LYLXkA59Nn9yEGScYvnTp3mpuuuJpfLM3AgT1StEjFd3vuR3+PmV19Ptl5HUUK8ik9xVsUpBvg+ZBMpTg4N0Jo1iVopgmAps6tUWhrFrSgKYRjiewLF87CEwZc/813u/YuHOf7Pg3z+o58k7utkrRihIn6mxxq/eIT8GX2Mv5SDWMoXrFGjRo0a/3cckFJu+z/tuOBb7jVq1KhR4/+eWnCvUaNGjVcgteBeo0aNGq9AasG9Ro0aNV6B1IJ7jRo1arwCqQX3GjVq1HgFcqEMYioB514loUY9MHe+D+ICp+boZ1NzdG7+vfnp/Ld2XCjBfeDfytWssYQQYn/N0bmpOfrZ1Bydm1eSn1q3TI0aNWq8AqkF9xo1atR4BXKhBPcvnO8D+HdAzdHPpuboZ1NzdG5eMX4uiLllatSoUaPGy8uF0nKvUaNGjRovI7XgXqNGjRqvQM57cBdC3CyEGBBCnBZCfPh8H88vEyHEl4QQM0KIIy+pywohHhJCnFr+mVmuF0KITy97OiSE2PqS37lr+f2nhBB3nY9z+UUghGgXQjwmhDgmhDgqhHj/cn3N0TJCCEsIsVcIcXDZ0X9dru8WQuxZdvFNIYSxXG8ub59e3t/1kr/1h8v1A0KIm87TKf1CEEKoQojnhRD3LW+/8v1IKc/bC1CBM0APYAAHgf7zeUy/5PO/EtgKHHlJ3ceBDy+XPwz8xXL5VuDHgAB2AHuW67PA4PLPzHI5c77P7WXy0wJsXS4ngJNAf83RTzkSQHy5rAN7ls/9W8Cblus/B7x7ufwfgM8tl98EfHO53L98/ZlA9/J1qZ7v83sZPf0+8HXgvuXtV7yf891yvxg4LaUclFK6wDeA15znY/qlIaV8Elj4V9WvAb6yXP4K8NqX1H9VLvEskBZCtAA3AQ9JKReklDngIeDmX/jB/xKQUk5KKZ9bLheB40AbNUcvsnyuP1ljUV9+SeBa4DvL9f/a0U/cfQe4TiwtV/Ua4BtSSkdKOQScZun6/HePEGIFcBvw98vbgl8BP+c7uLcBoy/ZHluu+1WmSUo5uVyeApqWy/+Wq18Jh8u3x1tYapnWHL2E5S6HF4AZlr64zgB5KaW//JaXnu+LLpb3LwJ1vLId/TXwISBc3q7jV8DP+Q7uNc6BXLof/JXPVRVCxIF7gA9IKQsv3VdzBFLKQEq5GVjBUmtyzfk9ogsHIcSrgBkp5YHzfSy/bM53cB8H2l+yvWK57leZ6eWuBJZ/zizX/1uuXtEOhRA6S4H9a1LKf1murjn6PyClzAOPAZey1CX1k7mjXnq+L7pY3p8C5nnlOtoJvFoIcZalbt9rgU/xK+DnfAf3fUDv8pNrg6UHGN8/z8d0vvk+8JNsjruA772k/u3LGSE7gMXlrokHgBuFEJnlrJEbl+v+3bPc1/kPwHEp5SdesqvmaBkhRIMQIr1cjgA3sPRs4jHg9ctv+9eOfuLu9cCjy3c/3wfetJwt0g30Ant/KSfxC0RK+YdSyhVSyi6W4sujUsq38qvg53w/0WUpw+EkS/2EHz3fx/NLPvd/BiYBj6U+vN9kqX/vEeAU8DCQXX6vAD6z7OkwsO0lf+cdLD3gOQ38xvk+r5fRz+UsdbkcAl5Yft1ac/RTjjYCzy87OgL8l+X6HpaCz2ng24C5XG8tb59e3t/zkr/10WV3A8At5/vcfgGuruZ/Z8u84v3Uph+oUaNGjVcg57tbpkaNGjVq/AKoBfcaNWrUeAVSC+41atSo8QqkFtxr1KhR4xVILbjXqFGjxiuQWnCvUaNGjVcgteBeo0aNGq9A/hdBhOQ0JUuYnQAAAABJRU5ErkJggg==\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "show(make_grid(imglist, padding=100))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:49.291422\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "show(make_grid(imglist, padding=100, normalize=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:50.133283\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "show(make_grid(imglist, padding=100, normalize=True, value_range=(0, 1)))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:51.060394\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "show(make_grid(imglist, padding=100, normalize=True, value_range=(0, 0.5)))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:51.844460\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "show(make_grid(imglist, padding=100, normalize=True, scale_each=True))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:52.624197\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "show(make_grid(imglist, padding=100, normalize=True, value_range=(0, 0.5), scale_each=True))" + ] + }, + { + "source": [ + "## Visualize Bounding Boxes" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "You can use `torchvision.utils.draw_bounding_boxes` to draw boxes on image.\n", + "\n", + "You can set the colors, labels, width as well as font and font size !\n", + "\n", + "Note that this util requires a single image of dtype `uint8`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision.utils import draw_bounding_boxes" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([3, 768, 1024])\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:53.654506\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "lena = scipy.misc.face()\n", + "img = transforms.ToTensor()(lena)\n", + "img = transforms.ConvertImageDtype(dtype=torch.uint8) (img)\n", + "\n", + "print(img.size())\n", + "\n", + "show(img)" + ] + }, + { + "source": [ + "We will draw a few boxes on lena!\n", + "\n", + "Note that the boxes are in `(xmin, ymin, xmax, ymax)` format\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:54.157276\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "boxes = torch.tensor([[100, 400, 500, 740], [500, 200, 800, 580]], dtype=torch.float)\n", + "labels = [\"grass\", \"lena\"]\n", + "colors = [\"blue\", \"yellow\"]\n", + "result = draw_bounding_boxes(img, boxes, labels=labels, colors=colors, width=10)\n", + "show(result)" + ] + }, + { + "source": [ + "You can also `fill` the box with the color.\n", + "\n", + "Note that after filling with color, one needs to save the resultant tensor in PNG i.e. 4 channel color format.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-24T23:32:54.542848\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "result = draw_bounding_boxes(img, boxes, labels=labels, colors=colors, width=10, fill=True)\n", + "show(result)" + ] + }, + { + "source": [ + "You can also plot bounding boxes produced from torchvision detection models.\n", + "\n", + "Here is demo with torchvision's FasterRCNN. You can also try using RetinaNet" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision.models.detection import fasterrcnn_resnet50_fpn\n", + "\n", + "model = fasterrcnn_resnet50_fpn(pretrained=True)\n", + "model = model.eval()" + ] + }, + { + "source": [ + "Let's load an image and get predictions from model." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-26T09:33:29.242197\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "lena = scipy.misc.face()\n", + "img = transforms.ToTensor()(lena)\n", + "show(img)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[{'boxes': tensor([[ 67.7731, 21.4386, 953.7158, 699.8793],\n [ 202.9559, 4.7902, 940.4207, 679.3505],\n [ 29.5735, 21.2866, 376.5114, 424.0385],\n [ 0.0000, 301.0412, 1024.0000, 768.0000],\n [ 52.2440, 281.1678, 784.5737, 733.5809],\n [ 57.0902, 18.2170, 954.9303, 709.1071],\n [ 27.6776, 359.6552, 814.2780, 753.4029],\n [ 78.1657, 32.2182, 938.7345, 703.4693],\n [ 50.6699, 31.5133, 918.5210, 722.1469],\n [ 0.0000, 260.4532, 729.0366, 768.0000],\n [ 480.9375, 512.6833, 784.6242, 616.1514],\n [ 0.0000, 268.2257, 953.8960, 768.0000],\n [ 100.8516, 354.4102, 766.3854, 718.2952]], grad_fn=), 'labels': tensor([17, 18, 20, 15, 16, 23, 51, 16, 20, 64, 16, 62, 20]), 'scores': tensor([0.3728, 0.3323, 0.3065, 0.2696, 0.2288, 0.2064, 0.1333, 0.1174, 0.1026,\n 0.0963, 0.0725, 0.0574, 0.0549], grad_fn=)}]\n" + ] + } + ], + "source": [ + "# Get predictions from model\n", + "outputs = model(img.unsqueeze(0))\n", + "print(outputs)" + ] + }, + { + "source": [ + "Let's plot top 5 boxes detected by our model" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-26T09:34:59.912114\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "boxes = outputs[0]['boxes']\n", + "colors = [\"blue\", \"red\", \"green\", \"yellow\", \"orange\"]\n", + "\n", + "# We need a uint8 image for plotting!\n", + "img = transforms.ConvertImageDtype(dtype=torch.uint8) (img)\n", + "\n", + "result = draw_bounding_boxes(img, boxes=boxes[:5], colors=colors, width=10, fill=False)\n", + "show(result)" + ] + }, + { + "source": [ + "## Visualize Segmenation Masks" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "source": [ + "You can use `torchvision.utils.draw_segmentation_masks` to draw masks on image.\n", + "\n", + "You can set the colors as well as transparency of masks drawn.\n", + "\n", + "Note that this util requires a single RGB image of dtype `uint8`.\n" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision.utils import draw_segmentation_masks\n", + "from PIL import Image\n", + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "url = \"http://images.cocodataset.org/val2017/000000281759.jpg\"\n", + "img = Image.open(requests.get(url, stream=True).raw)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "torch.Size([3, 427, 640])\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-26T10:46:04.209868\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# lena = scipy.misc.face()\n", + "img = transforms.ToTensor()(img)\n", + "\n", + "print(img.size())\n", + "show(img)" + ] + }, + { + "source": [ + "We will draw a few maks on lena!\n", + "\n", + "Note that the masks contain tensors denoting probabilites of each class.\n", + "\n", + "Here is demo with torchvision's FCN Resnet-50. You can also try using DeepLabv3 or lraspp mobilenet models." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "from torchvision.models.segmentation import fcn_resnet50\n", + "\n", + "model = fcn_resnet50(pretrained=True)\n", + "model = model.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "output = model(img.unsqueeze(0))\n", + "masks = output['out'].squeeze(0)" + ] + }, + { + "source": [ + "Note that this utility too needs uint8 dtype image.\n", + "\n", + "You can vary alpha to get more transparent or filled masks." + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "img = transforms.ConvertImageDtype(dtype=torch.uint8) (img)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-26T10:46:11.418103\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "result = draw_segmentation_masks(img, masks, alpha=0.2)\n", + "show(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-26T10:46:11.879624\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "result = draw_segmentation_masks(img, masks, alpha=0.4)\n", + "show(result)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n 2021-03-26T10:46:12.511543\n image/svg+xml\n \n \n Matplotlib v3.3.4, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "result = draw_segmentation_masks(img, masks, alpha=0.6)\n", + "show(result)" + ] + } + ] +} \ No newline at end of file diff --git a/hubconf.py b/hubconf.py index f43f922e89c..097759bdd89 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,6 +1,7 @@ # Optional list of dependencies required by the package dependencies = ['torch'] +# classification from torchvision.models.alexnet import alexnet from torchvision.models.densenet import densenet121, densenet169, densenet201, densenet161 from torchvision.models.inception import inception_v3 @@ -8,7 +9,13 @@ resnext50_32x4d, resnext101_32x8d, wide_resnet50_2, wide_resnet101_2 from torchvision.models.squeezenet import squeezenet1_0, squeezenet1_1 from torchvision.models.vgg import vgg11, vgg13, vgg16, vgg19, vgg11_bn, vgg13_bn, vgg16_bn, vgg19_bn -from torchvision.models.segmentation import fcn_resnet101, deeplabv3_resnet101 from torchvision.models.googlenet import googlenet from torchvision.models.shufflenetv2 import shufflenet_v2_x0_5, shufflenet_v2_x1_0 -from torchvision.models.mobilenet import mobilenet_v2 +from torchvision.models.mobilenetv2 import mobilenet_v2 +from torchvision.models.mobilenetv3 import mobilenet_v3_large, mobilenet_v3_small +from torchvision.models.mnasnet import mnasnet0_5, mnasnet0_75, mnasnet1_0, \ + mnasnet1_3 + +# segmentation +from torchvision.models.segmentation import fcn_resnet50, fcn_resnet101, \ + deeplabv3_resnet50, deeplabv3_resnet101, deeplabv3_mobilenet_v3_large, lraspp_mobilenet_v3_large diff --git a/ios/CMakeLists.txt b/ios/CMakeLists.txt new file mode 100644 index 00000000000..6b9fd3925b2 --- /dev/null +++ b/ios/CMakeLists.txt @@ -0,0 +1,23 @@ +cmake_minimum_required(VERSION 3.4.1) +set(TARGET torchvision_ops) +project(${TARGET} CXX) +set(CMAKE_CXX_STANDARD 14) +set(LIBTORCH_HEADER_ROOT ${LIBTORCH_HEADER_ROOT}) +set(LIBRARY_OUTPUT_PATH ../lib) + +file(GLOB VISION_SRCS + ../torchvision/csrc/ops/cpu/*.h + ../torchvision/csrc/ops/cpu/*.cpp + ../torchvision/csrc/ops/*.h + ../torchvision/csrc/ops/*.cpp) + +add_library(${TARGET} STATIC + ${VISION_SRCS} +) + +file(GLOB PYTORCH_HEADERS "${LIBTORCH_HEADER_ROOT}") +file(GLOB PYTORCH_HEADERS_CSRC "${LIBTORCH_HEADER_ROOT}/torch/csrc/api/include") +target_include_directories(${TARGET} PRIVATE + ${PYTORCH_HEADERS} + ${PYTORCH_HEADERS_CSRC} +) diff --git a/ios/build_ios.sh b/ios/build_ios.sh new file mode 100755 index 00000000000..81ac2f2a218 --- /dev/null +++ b/ios/build_ios.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -ex -o pipefail +echo "" +echo "DIR: $(pwd)" +VISION_IOS_ROOT=$(dirname $(realpath $0)) + +if ! [ -n "${LIBTORCH_HEADER_ROOT:-}" ]; then + echo "Missing parameter: LIBTORCH_HEADER_ROOT" + exit 1 +fi + +if [ -n "${IOS_ARCH:-}" ]; then + if [ "${IOS_ARCH:-}" == "arm64" ]; then + IOS_PLATFORM="OS" + elif [ "${IOS_ARCH:-}" == "x86_64" ]; then + IOS_PLATFORM="SIMULATOR" + fi +fi + +mkdir -p ${VISION_IOS_ROOT}/lib +mkdir -p ${VISION_IOS_ROOT}/build +cd ${VISION_IOS_ROOT}/build +cmake -DLIBTORCH_HEADER_ROOT=${LIBTORCH_HEADER_ROOT} \ + -DCMAKE_TOOLCHAIN_FILE=${VISION_IOS_ROOT}/../cmake/iOS.cmake \ + -DIOS_ARCH=${IOS_ARCH} \ + -DIOS_PLATFORM=${IOS_PLATFORM} \ + .. +make +rm -rf ${VISION_IOS_ROOT}/build diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000000..040b52dfda4 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,69 @@ +[mypy] + +files = torchvision +show_error_codes = True +pretty = True + +[mypy-torchvision.io._video_opt.*] + +ignore_errors = True + +[mypy-torchvision.io.*] + +ignore_errors = True + +[mypy-torchvision.models.densenet.*] + +ignore_errors=True + +[mypy-torchvision.models.detection.*] + +ignore_errors = True + +[mypy-torchvision.models.quantization.*] + +ignore_errors = True + +[mypy-torchvision.ops.*] + +ignore_errors = True + +[mypy-torchvision.transforms.*] + +ignore_errors = True + +[mypy-PIL.*] + +ignore_missing_imports = True + +[mypy-numpy.*] + +ignore_missing_imports = True + +[mypy-scipy.*] + +ignore_missing_imports = True + +[mypy-pycocotools.*] + +ignore_missing_imports = True + +[mypy-lmdb.*] + +ignore_missing_imports = True + +[mypy-pandas.*] + +ignore_missing_imports = True + +[mypy-accimage.*] + +ignore_missing_imports = True + +[mypy-av.*] + +ignore_missing_imports = True + +[mypy-defusedxml.*] + +ignore_missing_imports = True diff --git a/packaging/build_cmake.sh b/packaging/build_cmake.sh new file mode 100755 index 00000000000..da758f4b7dc --- /dev/null +++ b/packaging/build_cmake.sh @@ -0,0 +1,106 @@ +#!/bin/bash +set -ex + +PARALLELISM=8 +if [ -n "$MAX_JOBS" ]; then + PARALLELISM=$MAX_JOBS +fi + +if [[ "$(uname)" != Darwin && "$OSTYPE" != "msys" ]]; then + eval "$(./conda/bin/conda shell.bash hook)" + conda activate ./env +fi + +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +. "$script_dir/pkg_helpers.bash" + +export BUILD_TYPE=conda +setup_env 0.10.0 +export SOURCE_ROOT_DIR="$PWD" +setup_conda_pytorch_constraint +setup_conda_cudatoolkit_plain_constraint + +if [[ "$OSTYPE" == "msys" ]]; then + conda install -yq conda-build cmake pillow future + pip install dataclasses +fi + +setup_visual_studio_constraint +setup_junit_results_folder + +conda install -yq pytorch=$PYTORCH_VERSION $CONDA_CUDATOOLKIT_CONSTRAINT $CONDA_CPUONLY_FEATURE -c "pytorch-${UPLOAD_CHANNEL}" +TORCH_PATH=$(dirname $(python -c "import torch; print(torch.__file__)")) + +if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then + conda install -yq libpng jpeg +else + yum install -y libpng-devel libjpeg-turbo-devel +fi + +mkdir cpp_build +pushd cpp_build + +# Generate libtorchvision files +cmake .. -DTorch_DIR=$TORCH_PATH/share/cmake/Torch -DWITH_CUDA=$CMAKE_USE_CUDA + +# Compile and install libtorchvision +if [[ "$OSTYPE" == "msys" ]]; then + "$script_dir/windows/internal/vc_env_helper.bat" "$script_dir/windows/internal/build_cmake.bat" $PARALLELISM + CONDA_PATH=$(dirname $(which python)) + cp -r "C:/Program Files (x86)/torchvision/include/torchvision" $CONDA_PATH/include +else + make -j$PARALLELISM + make install + + if [[ "$(uname)" == Darwin ]]; then + CONDA_PATH=$(dirname $(dirname $(which python))) + cp -r /usr/local/include/torchvision $CONDA_PATH/include/ + export C_INCLUDE_PATH=/usr/local/include + export CPLUS_INCLUDE_PATH=/usr/local/include + fi +fi + +popd + +# Install torchvision locally +python setup.py develop + +# Trace, compile and run project that uses Faster-RCNN +pushd test/tracing/frcnn +mkdir build + +# Trace model +python trace_model.py +cp fasterrcnn_resnet50_fpn.pt build + +cd build +cmake .. -DTorch_DIR=$TORCH_PATH/share/cmake/Torch -DWITH_CUDA=$CMAKE_USE_CUDA +if [[ "$OSTYPE" == "msys" ]]; then + "$script_dir/windows/internal/vc_env_helper.bat" "$script_dir/windows/internal/build_frcnn.bat" $PARALLELISM + mv fasterrcnn_resnet50_fpn.pt Release + cd Release + export PATH=$(cygpath "C:/Program Files (x86)/torchvision/bin"):$(cygpath $TORCH_PATH)/lib:$PATH +else + make -j$PARALLELISM +fi + +# Run traced program +./test_frcnn_tracing + +# Compile and run the CPP example +popd +cd examples/cpp/hello_world + +mkdir build +cd build +cmake .. -DTorch_DIR=$TORCH_PATH/share/cmake/Torch + +if [[ "$OSTYPE" == "msys" ]]; then + "$script_dir/windows/internal/vc_env_helper.bat" "$script_dir/windows/internal/build_cpp_example.bat" $PARALLELISM + cd Release +else + make -j$PARALLELISM +fi + +# Run CPP example +./hello-world diff --git a/packaging/build_conda.sh b/packaging/build_conda.sh index aaddf0710c8..5f2239aae7e 100755 --- a/packaging/build_conda.sh +++ b/packaging/build_conda.sh @@ -5,9 +5,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" . "$script_dir/pkg_helpers.bash" export BUILD_TYPE=conda -setup_env 0.5.0 +setup_env 0.10.0 export SOURCE_ROOT_DIR="$PWD" setup_conda_pytorch_constraint setup_conda_cudatoolkit_constraint setup_visual_studio_constraint +setup_junit_results_folder conda build $CONDA_CHANNEL_FLAGS -c defaults -c conda-forge --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torchvision diff --git a/packaging/build_wheel.sh b/packaging/build_wheel.sh index 7d37239563d..72acdf01fbe 100755 --- a/packaging/build_wheel.sh +++ b/packaging/build_wheel.sh @@ -5,11 +5,55 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" . "$script_dir/pkg_helpers.bash" export BUILD_TYPE=wheel -setup_env 0.5.0 +setup_env 0.10.0 setup_wheel_python pip_install numpy pyyaml future ninja -# TODO remove after https://github.com/pytorch/pytorch/pull/27282 gets merged -pip_install six setup_pip_pytorch_version python setup.py clean -IS_WHEEL=1 python setup.py bdist_wheel + +# Copy binaries to be included in the wheel distribution +if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then + python_exec="$(which python)" + bin_path=$(dirname $python_exec) + env_path=$(dirname $bin_path) + if [[ "$(uname)" == Darwin ]]; then + # Install delocate to relocate the required binaries + pip_install delocate + else + cp "$bin_path/Library/bin/libpng16.dll" torchvision + cp "$bin_path/Library/bin/libjpeg.dll" torchvision + fi +else + # Install auditwheel to get some inspection utilities + pip_install auditwheel + + # Point to custom libraries + export LD_LIBRARY_PATH=$(pwd)/ext_libraries/lib:$LD_LIBRARY_PATH + export TORCHVISION_INCLUDE=$(pwd)/ext_libraries/include + export TORCHVISION_LIBRARY=$(pwd)/ext_libraries/lib +fi + +download_copy_ffmpeg + +if [[ "$OSTYPE" == "msys" ]]; then + IS_WHEEL=1 "$script_dir/windows/internal/vc_env_helper.bat" python setup.py bdist_wheel +else + IS_WHEEL=1 python setup.py bdist_wheel +fi + + +if [[ "$(uname)" == Darwin ]]; then + pushd dist/ + python_exec="$(which python)" + bin_path=$(dirname $python_exec) + env_path=$(dirname $bin_path) + for whl in *.whl; do + DYLD_LIBRARY_PATH="$env_path/lib/:$DYLD_LIBRARY_PATH" delocate-wheel -v $whl + done +else + if [[ "$OSTYPE" == "msys" ]]; then + "$script_dir/windows/internal/vc_env_helper.bat" python $script_dir/wheel/relocate.py + else + LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH" python $script_dir/wheel/relocate.py + fi +fi diff --git a/packaging/conda/build_vision.sh b/packaging/conda/build_vision.sh deleted file mode 100755 index 000f314670b..00000000000 --- a/packaging/conda/build_vision.sh +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env bash -if [[ -x "/remote/anaconda_token" ]]; then - . /remote/anaconda_token || true -fi - -set -ex - -# Function to retry functions that sometimes timeout or have flaky failures -retry () { - $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) -} - -# Parse arguments and determmine version -########################################################### -if [[ -n "$DESIRED_CUDA" && -n "$TORCHVISION_BUILD_VERSION" && -n "$TORCHVISION_BUILD_NUMBER" ]]; then - desired_cuda="$DESIRED_CUDA" - build_version="$PYTORCH_BUILD_VERSION" - build_number="$PYTORCH_BUILD_NUMBER" -else - if [ "$#" -ne 3 ]; then - echo "Illegal number of parameters. Pass cuda version, pytorch version, build number" - echo "CUDA version should be Mm with no dot, e.g. '80'" - echo "DESIRED_PYTHON should be M.m, e.g. '2.7'" - exit 1 - fi - - desired_cuda="$1" - build_version="$2" - build_number="$3" -fi -if [[ "$desired_cuda" != cpu ]]; then - desired_cuda="$(echo $desired_cuda | tr -d cuda. )" -fi -echo "Building cuda version $desired_cuda and torchvision version: $build_version build_number: $build_number" - -if [[ "$desired_cuda" == 'cpu' ]]; then - cpu_only=1 - cuver="cpu" -else - # Switch desired_cuda to be M.m to be consistent with other scripts in - # pytorch/builder - export FORCE_CUDA=1 - cuda_nodot="$desired_cuda" - - if [[ ${#cuda_nodot} -eq 2 ]]; then - desired_cuda="${desired_cuda:0:1}.${desired_cuda:1:1}" - elif [[ ${#cuda_nodot} -eq 3 ]]; then - desired_cuda="${desired_cuda:0:2}.${desired_cuda:2:1}" - else - echo "unknown cuda version $cuda_nodot" - exit 1 - fi - - cuver="cu$cuda_nodot" -fi - -export TORCHVISION_BUILD_VERSION=$build_version -export TORCHVISION_BUILD_NUMBER=$build_number - -if [[ -z "$DESIRED_PYTHON" ]]; then - DESIRED_PYTHON=('3.5' '3.6' '3.7') -fi - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" - -if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then - WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)" -fi - -mkdir -p "$WIN_PACKAGE_WORK_DIR" || true -vision_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchvision-src" -git config --system core.longpaths true - -if [[ ! -d "$vision_rootdir" ]]; then - rm -rf "$vision_rootdir" - git clone "https://github.com/pytorch/vision" "$vision_rootdir" - pushd "$vision_rootdir" - git checkout $PYTORCH_BRANCH - popd -fi - -cd "$SOURCE_DIR" - -export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda" -export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe" -rm -rf "$tmp_conda" -rm -f "$miniconda_exe" -curl -sSk https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe" -"$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe" -pushd $tmp_conda -export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH" -popd -retry conda install -yq conda-build - -ANACONDA_USER=pytorch-nightly -conda config --set anaconda_upload no - - -export TORCHVISION_PACKAGE_SUFFIX="" -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="" - export CONDA_CPUONLY_FEATURE="- cpuonly # [not osx]" - export CUDA_VERSION="None" -else - export CONDA_CPUONLY_FEATURE="" - . ./switch_cuda_version.sh $desired_cuda - if [[ "$desired_cuda" == "10.1" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" - elif [[ "$desired_cuda" == "10.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" - elif [[ "$desired_cuda" == "9.2" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" - elif [[ "$desired_cuda" == "9.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.0,<9.1 # [not osx]" - elif [[ "$desired_cuda" == "8.0" ]]; then - export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=8.0,<8.1 # [not osx]" - else - echo "unhandled desired_cuda: $desired_cuda" - exit 1 - fi -fi - -if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly" - export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ - python -c "import os, sys, json, re; cuver = '$cuver'; \ - cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ - print(re.sub(r'\\+.*$', '', \ - [x['version'] for x in json.load(sys.stdin)['pytorch'] \ - if (x['platform'] == 'darwin' or cuver in x['fn']) \ - and 'py' + os.environ['DESIRED_PYTHON'] in x['fn']][-1]))")" - if [[ -z "$PYTORCH_VERSION" ]]; then - echo "PyTorch version auto detection failed" - echo "No package found for desired_cuda=$desired_cuda and DESIRED_PYTHON=$DESIRED_PYTHON" - exit 1 - fi -else - export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly" -fi -if [[ "$desired_cuda" == 'cpu' ]]; then - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION" -else - export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" - export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" -fi - -# Loop through all Python versions to build a package for each -for py_ver in "${DESIRED_PYTHON[@]}"; do - build_string="py${py_ver}_${build_string_suffix}" - folder_tag="${build_string}_$(date +'%Y%m%d')" - - # Create the conda package into this temporary folder. This is so we can find - # the package afterwards, as there's no easy way to extract the final filename - # from conda-build - output_folder="out_$folder_tag" - rm -rf "$output_folder" - mkdir "$output_folder" - - export VSTOOLCHAIN_PACKAGE=vs2017 - - # We need to build the compiler activation scripts first on Windows - time VSDEVCMD_ARGS=${VSDEVCMD_ARGS[@]} \ - conda build -c "$ANACONDA_USER" \ - --no-anaconda-upload \ - --output-folder "$output_folder" \ - ../$VSTOOLCHAIN_PACKAGE - - cp ../$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml ../torchvision/conda_build_config.yaml - - conda config --set anaconda_upload no - echo "Calling conda-build at $(date)" - if [[ "$desired_cuda" == "9.2" ]]; then - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - BUILD_VERSION="$TORCHVISION_BUILD_VERSION" \ - CU_VERSION="$cuver" \ - SOURCE_ROOT_DIR="$vision_rootdir" \ - conda build -c "$ANACONDA_USER" \ - -c defaults \ - -c conda-forge \ - -c "numba/label/dev" \ - --no-anaconda-upload \ - --python "$py_ver" \ - --output-folder "$output_folder" \ - --no-verify \ - --no-test \ - ../torchvision - else - time CMAKE_ARGS=${CMAKE_ARGS[@]} \ - BUILD_VERSION="$TORCHVISION_BUILD_VERSION" \ - CU_VERSION="$cuver" \ - SOURCE_ROOT_DIR="$vision_rootdir" \ - conda build -c "$ANACONDA_USER" \ - -c defaults \ - -c conda-forge \ - --no-anaconda-upload \ - --python "$py_ver" \ - --output-folder "$output_folder" \ - --no-verify \ - --no-test \ - ../torchvision - fi - echo "Finished conda-build at $(date)" - - # Extract the package for testing - ls -lah "$output_folder" - built_package="$(find $output_folder/ -name '*torchvision*.tar.bz2')" - - # Copy the built package to the host machine for persistence before testing - if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then - mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true - cp "$built_package" "$PYTORCH_FINAL_PACKAGE_DIR/" - fi -done - - -set +e diff --git a/packaging/conda/switch_cuda_version.sh b/packaging/conda/switch_cuda_version.sh deleted file mode 100755 index 342def93899..00000000000 --- a/packaging/conda/switch_cuda_version.sh +++ /dev/null @@ -1,28 +0,0 @@ -if [[ "$OSTYPE" == "msys" ]]; then - CUDA_DIR="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v$1" -else - CUDA_DIR="/usr/local/cuda-$1" -fi - -if ! ls "$CUDA_DIR" -then - echo "folder $CUDA_DIR not found to switch" -fi - -echo "Switching symlink to $CUDA_DIR" -mkdir -p /usr/local -rm -fr /usr/local/cuda -ln -s "$CUDA_DIR" /usr/local/cuda - -if [[ "$OSTYPE" == "msys" ]]; then - export CUDA_VERSION=`ls /usr/local/cuda/bin/cudart64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` - export CUDNN_VERSION=`ls /usr/local/cuda/bin/cudnn64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` -else - export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) - export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) -fi - -ls -alh /usr/local/cuda - -echo "CUDA_VERSION=$CUDA_VERSION" -echo "CUDNN_VERSION=$CUDNN_VERSION" diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash index 5d7109efe93..826fb525e3a 100644 --- a/packaging/pkg_helpers.bash +++ b/packaging/pkg_helpers.bash @@ -36,7 +36,7 @@ setup_cuda() { # Wheel builds need suffixes (but not if they're on OS X, which never has suffix) if [[ "$BUILD_TYPE" == "wheel" ]] && [[ "$(uname)" != Darwin ]]; then # The default CUDA has no suffix - if [[ "$CU_VERSION" != "cu101" ]]; then + if [[ "$CU_VERSION" != "cu102" ]]; then export PYTORCH_VERSION_SUFFIX="+$CU_VERSION" fi # Match the suffix scheme of pytorch, unless this package does not have @@ -49,6 +49,42 @@ setup_cuda() { # Now work out the CUDA settings case "$CU_VERSION" in + cu112) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2" + else + export CUDA_HOME=/usr/local/cuda-11.2/ + fi + export FORCE_CUDA=1 + export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + ;; + cu111) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1" + else + export CUDA_HOME=/usr/local/cuda-11.1/ + fi + export FORCE_CUDA=1 + export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" + ;; + cu110) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.0" + else + export CUDA_HOME=/usr/local/cuda-11.0/ + fi + export FORCE_CUDA=1 + export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0" + ;; + cu102) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" + else + export CUDA_HOME=/usr/local/cuda-10.2/ + fi + export FORCE_CUDA=1 + export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" + ;; cu101) if [[ "$OSTYPE" == "msys" ]]; then export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.1" @@ -56,9 +92,7 @@ setup_cuda() { export CUDA_HOME=/usr/local/cuda-10.1/ fi export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" + export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" ;; cu100) if [[ "$OSTYPE" == "msys" ]]; then @@ -67,9 +101,7 @@ setup_cuda() { export CUDA_HOME=/usr/local/cuda-10.0/ fi export FORCE_CUDA=1 - # Hard-coding gencode flags is temporary situation until - # https://github.com/pytorch/pytorch/pull/23408 lands - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50" + export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5" ;; cu92) if [[ "$OSTYPE" == "msys" ]]; then @@ -78,10 +110,13 @@ setup_cuda() { export CUDA_HOME=/usr/local/cuda-9.2/ fi export FORCE_CUDA=1 - export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50" + export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0" ;; cpu) ;; + rocm*) + export FORCE_CUDA=1 + ;; *) echo "Unrecognized CU_VERSION=$CU_VERSION" exit 1 @@ -106,6 +141,12 @@ setup_build_version() { else export BUILD_VERSION="$BUILD_VERSION$VERSION_SUFFIX" fi + + # Set build version based on tag if on tag + if [[ -n "${CIRCLE_TAG}" ]]; then + # Strip tag + export BUILD_VERSION="$(echo "${CIRCLE_TAG}" | sed -e 's/^v//' -e 's/-.*$//')${VERSION_SUFFIX}" + fi } # Set some useful variables for OS X, if applicable @@ -115,6 +156,7 @@ setup_macos() { fi } + # Top-level entry point for things every package will need to do # # Usage: setup_env 0.2.0 @@ -138,12 +180,19 @@ retry () { # # Precondition: If Linux, you are in a soumith/manylinux-cuda* Docker image setup_wheel_python() { - if [[ "$(uname)" == Darwin ]]; then + if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then eval "$(conda shell.bash hook)" conda env remove -n "env$PYTHON_VERSION" || true - conda create -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION" + if [[ "$PYTHON_VERSION" == 3.9 ]]; then + export CONDA_CHANNEL_FLAGS="${CONDA_CHANNEL_FLAGS} -c=conda-forge" + fi + conda create ${CONDA_CHANNEL_FLAGS} -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION" conda activate "env$PYTHON_VERSION" + # Install libpng from Anaconda (defaults) + conda install ${CONDA_CHANNEL_FLAGS} -c conda-forge libpng "jpeg<=9b" -y else + # Install native CentOS libJPEG, LAME, freetype and GnuTLS + yum install -y libjpeg-turbo-devel lame freetype gnutls case "$PYTHON_VERSION" in 2.7) if [[ -n "$UNICODE_ABI" ]]; then @@ -155,12 +204,20 @@ setup_wheel_python() { 3.5) python_abi=cp35-cp35m ;; 3.6) python_abi=cp36-cp36m ;; 3.7) python_abi=cp37-cp37m ;; + 3.8) python_abi=cp38-cp38 ;; + 3.9) python_abi=cp39-cp39 ;; *) echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION" exit 1 ;; esac - export PATH="/opt/python/$python_abi/bin:$PATH" + # Download all the dependencies required to compile image and video_reader + # extensions + + mkdir -p ext_libraries + pushd ext_libraries + popd + export PATH="/opt/python/$python_abi/bin:$(pwd)/ext_libraries/bin:$PATH" fi } @@ -184,19 +241,19 @@ setup_pip_pytorch_version() { export PYTORCH_VERSION="$(pip show torch | grep ^Version: | sed 's/Version: *//')" fi else - pip_install "torch==$PYTORCH_VERSION$CUDA_SUFFIX" \ - -f https://download.pytorch.org/whl/torch_stable.html \ - -f https://download.pytorch.org/whl/nightly/torch_nightly.html + pip_install "torch==$PYTORCH_VERSION$PYTORCH_VERSION_SUFFIX" \ + -f "https://download.pytorch.org/whl/${CU_VERSION}/torch_stable.html" \ + -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/${CU_VERSION}/torch_${UPLOAD_CHANNEL}.html" fi } # Fill PYTORCH_VERSION with the latest conda nightly version, and # CONDA_CHANNEL_FLAGS with appropriate flags to retrieve these versions # -# You MUST have populated CUDA_SUFFIX before hand. +# You MUST have populated PYTORCH_VERSION_SUFFIX before hand. setup_conda_pytorch_constraint() { if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly" + export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch" export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \ cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ @@ -211,7 +268,7 @@ setup_conda_pytorch_constraint() { exit 1 fi else - export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly" + export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-${UPLOAD_CHANNEL}" fi if [[ "$CU_VERSION" == cpu ]]; then export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION${PYTORCH_VERSION_SUFFIX}" @@ -220,6 +277,12 @@ setup_conda_pytorch_constraint() { export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}" export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}${PYTORCH_VERSION_SUFFIX}" fi + if [[ "$OSTYPE" == msys && "$CU_VERSION" == cu92 ]]; then + export CONDA_CHANNEL_FLAGS="${CONDA_CHANNEL_FLAGS} -c defaults -c numba/label/dev" + fi + if [[ "$PYTHON_VERSION" == 3.9 ]]; then + export CONDA_CHANNEL_FLAGS="${CONDA_CHANNEL_FLAGS} -c=conda-forge" + fi } # Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT @@ -229,6 +292,18 @@ setup_conda_cudatoolkit_constraint() { export CONDA_CUDATOOLKIT_CONSTRAINT="" else case "$CU_VERSION" in + cu112) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]" + ;; + cu111) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]" + ;; + cu110) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]" + ;; + cu102) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]" + ;; cu101) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" ;; @@ -250,12 +325,81 @@ setup_conda_cudatoolkit_constraint() { fi } +setup_conda_cudatoolkit_plain_constraint() { + export CONDA_CPUONLY_FEATURE="" + export CMAKE_USE_CUDA=1 + if [[ "$(uname)" == Darwin ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="" + export CMAKE_USE_CUDA=0 + else + case "$CU_VERSION" in + cu112) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=11.2" + ;; + cu111) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=11.1" + ;; + cu102) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.2" + ;; + cu101) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.1" + ;; + cu100) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.0" + ;; + cu92) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=9.2" + ;; + cpu) + export CONDA_CUDATOOLKIT_CONSTRAINT="" + export CONDA_CPUONLY_FEATURE="cpuonly" + export CMAKE_USE_CUDA=0 + ;; + *) + echo "Unrecognized CU_VERSION=$CU_VERSION" + exit 1 + ;; + esac + fi +} + # Build the proper compiler package before building the final package setup_visual_studio_constraint() { if [[ "$OSTYPE" == "msys" ]]; then - export VSTOOLCHAIN_PACKAGE=vs2019 - export VSDEVCMD_ARGS='' + export VSTOOLCHAIN_PACKAGE=vs$VC_YEAR conda build $CONDA_CHANNEL_FLAGS --no-anaconda-upload packaging/$VSTOOLCHAIN_PACKAGE cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/torchvision/conda_build_config.yaml fi } + +setup_junit_results_folder() { + if [[ "$CI" == "true" ]]; then + export CONDA_PYTORCH_BUILD_RESULTS_DIRECTORY="${SOURCE_ROOT_DIR}/build_results/results.xml" + fi +} + + +download_copy_ffmpeg() { + if [[ "$OSTYPE" == "msys" ]]; then + # conda install -yq ffmpeg=4.2 -c pytorch + # curl -L -q https://anaconda.org/pytorch/ffmpeg/4.3/download/win-64/ffmpeg-4.3-ha925a31_0.tar.bz2 --output ffmpeg-4.3-ha925a31_0.tar.bz2 + # bzip2 --decompress --stdout ffmpeg-4.3-ha925a31_0.tar.bz2 | tar -x --file=- + # cp Library/bin/*.dll ../torchvision + echo "FFmpeg is disabled currently on Windows" + else + if [[ "$(uname)" == Darwin ]]; then + conda install -yq ffmpeg=4.2 -c pytorch + conda install -yq wget + else + # pushd ext_libraries + # wget -q https://anaconda.org/pytorch/ffmpeg/4.2/download/linux-64/ffmpeg-4.2-hf484d3e_0.tar.bz2 + # tar -xjvf ffmpeg-4.2-hf484d3e_0.tar.bz2 + # rm -rf ffmpeg-4.2-hf484d3e_0.tar.bz2 + # ldconfig + # which ffmpeg + # popd + echo "FFmpeg is disabled currently on Linux" + fi + fi +} diff --git a/packaging/torchvision/bld.bat b/packaging/torchvision/bld.bat deleted file mode 100644 index 73f217c2cf1..00000000000 --- a/packaging/torchvision/bld.bat +++ /dev/null @@ -1,26 +0,0 @@ -@echo on - -set TORCHVISION_BUILD_VERSION=%PKG_VERSION% -set TORCHVISION_BUILD_NUMBER=%PKG_BUILDNUM% - -set build_with_cuda= - -if "%CUDA_VERSION%" == "None" goto cuda_flags_end -if "%CUDA_VERSION%" == "cpu" goto cuda_flags_end -if "%CUDA_VERSION%" == "" goto cuda_flags_end - -set build_with_cuda=1 -set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1% - -set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% -set CUDA_BIN_PATH=%CUDA_PATH%\bin -set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -if "%desired_cuda%" == "9.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "9.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "10.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 -if "%desired_cuda%" == "10.1" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - -:cuda_flags_end - -python setup.py install --single-version-externally-managed --record=record.txt -if errorlevel 1 exit /b 1 diff --git a/packaging/torchvision/conda_build_config.yaml b/packaging/torchvision/conda_build_config.yaml index 5188bb0ebec..257515c8b70 100644 --- a/packaging/torchvision/conda_build_config.yaml +++ b/packaging/torchvision/conda_build_config.yaml @@ -1,3 +1,5 @@ +channel_sources: + - pytorch-nightly,pytorch,defaults blas_impl: - mkl # [x86_64] c_compiler: diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml index da075ff03cb..8516b2f0ed4 100644 --- a/packaging/torchvision/meta.yaml +++ b/packaging/torchvision/meta.yaml @@ -8,6 +8,10 @@ source: requirements: build: - {{ compiler('c') }} # [win] + - libpng + - jpeg + # NOTE: The only ffmpeg version that we build is actually 4.2 + - ffmpeg >=4.2 # [not win] host: - python @@ -18,19 +22,21 @@ requirements: run: - python - - pillow >=4.1.1 - - numpy >=1.11 - - six + - libpng + - ffmpeg >=4.2 # [not win] + - jpeg + - pillow >=5.3.0 {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }} {{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }} build: string: py{{py}}_{{ environ['CU_VERSION'] }} - script: python setup.py install --single-version-externally-managed --record=record.txt # [not win] + script: python setup.py install --single-version-externally-managed --record=record.txt script_env: - CUDA_HOME - FORCE_CUDA - - NVCC_FLAGS + - BUILD_VERSION + - TORCH_CUDA_ARCH_LIST features: {{ environ.get('CONDA_CPUONLY_FEATURE') }} @@ -44,12 +50,9 @@ test: requires: - pytest - scipy - - mock - - av + - av >=8.0.1 + - jpeg - ca-certificates - - typing - commands: - pytest . about: diff --git a/packaging/vs2017/meta.yaml b/packaging/vs2017/meta.yaml index 34f4860ba85..1f569525ee1 100644 --- a/packaging/vs2017/meta.yaml +++ b/packaging/vs2017/meta.yaml @@ -19,27 +19,6 @@ outputs: # VS 2017 is binary-compatible with VS 2015/vc14. Tools are "v141". strong: - vc{{ vcfeature }} - run_exports: - - vc {{ vcver }} about: summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler license: BSD 3-clause - - name: vs{{ vsyear }}_runtime - script: install_runtime.bat - - name: vc - version: {{ vcver }} - track_features: - - vc{{ vcfeature }} - requirements: - run: - - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }} - about: - home: https://github.com/conda/conda/wiki/VC-features - license: Modified BSD License (3-clause) - license_family: BSD - summary: A meta-package to track VC features. - description: | - This metapackage is used to activate vc features without - depending on Python. - doc_url: https://github.com/conda/conda/wiki/VC-features - dev_url: https://github.com/conda/conda/wiki/VC-features diff --git a/packaging/vs2019/meta.yaml b/packaging/vs2019/meta.yaml index e3f8b471481..94a0ed4db3e 100644 --- a/packaging/vs2019/meta.yaml +++ b/packaging/vs2019/meta.yaml @@ -19,27 +19,6 @@ outputs: # VS 2019 is binary-compatible with VS 2017/vc 14.1 and 2015/vc14. Tools are "v142". strong: - vc{{ vcfeature }} - run_exports: - - vc {{ vcver }} about: summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler license: BSD 3-clause - - name: vs{{ vsyear }}_runtime - script: install_runtime.bat - - name: vc - version: {{ vcver }} - track_features: - - vc{{ vcfeature }} - requirements: - run: - - {{ pin_subpackage('vs' ~ vsyear ~ '_runtime') }} - about: - home: https://github.com/conda/conda/wiki/VC-features - license: Modified BSD License (3-clause) - license_family: BSD - summary: A meta-package to track VC features. - description: | - This metapackage is used to activate vc features without - depending on Python. - doc_url: https://github.com/conda/conda/wiki/VC-features - dev_url: https://github.com/conda/conda/wiki/VC-features diff --git a/packaging/wheel/linux_manywheel.sh b/packaging/wheel/linux_manywheel.sh index d04e334d237..19e7d1a7500 100644 --- a/packaging/wheel/linux_manywheel.sh +++ b/packaging/wheel/linux_manywheel.sh @@ -6,9 +6,9 @@ if [ "$#" -ne 1 ]; then echo "CUDA version should be cu92, cu100 or cpu" exit 1 fi -export CUVER="$1" # cu92 cu100 cpu +export CUVER="$1" # cu[0-9]* cpu -if [[ "$CUVER" == "cu101" ]]; then +if [[ "$CUVER" == "cu102" ]]; then cu_suffix="" else cu_suffix="+$CUVER" diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py new file mode 100644 index 00000000000..dd2c5d2a4ce --- /dev/null +++ b/packaging/wheel/relocate.py @@ -0,0 +1,417 @@ +# -*- coding: utf-8 -*- + +"""Helper script to package wheels and relocate binaries.""" + +# Standard library imports +import os +import io +import sys +import glob +import shutil +import zipfile +import hashlib +import platform +import subprocess +import os.path as osp +from base64 import urlsafe_b64encode + +# Third party imports +if sys.platform == 'linux': + from auditwheel.lddtree import lddtree +from wheel.bdist_wheel import get_abi_tag + + +ALLOWLIST = { + 'libgcc_s.so.1', 'libstdc++.so.6', 'libm.so.6', + 'libdl.so.2', 'librt.so.1', 'libc.so.6', + 'libnsl.so.1', 'libutil.so.1', 'libpthread.so.0', + 'libresolv.so.2', 'libX11.so.6', 'libXext.so.6', + 'libXrender.so.1', 'libICE.so.6', 'libSM.so.6', + 'libGL.so.1', 'libgobject-2.0.so.0', 'libgthread-2.0.so.0', + 'libglib-2.0.so.0', 'ld-linux-x86-64.so.2', 'ld-2.17.so' +} + +WINDOWS_ALLOWLIST = { + 'MSVCP140.dll', 'KERNEL32.dll', + 'VCRUNTIME140_1.dll', 'VCRUNTIME140.dll', + 'api-ms-win-crt-heap-l1-1-0.dll', + 'api-ms-win-crt-runtime-l1-1-0.dll', + 'api-ms-win-crt-stdio-l1-1-0.dll', + 'api-ms-win-crt-filesystem-l1-1-0.dll', + 'api-ms-win-crt-string-l1-1-0.dll', + 'api-ms-win-crt-environment-l1-1-0.dll', + 'api-ms-win-crt-math-l1-1-0.dll', + 'api-ms-win-crt-convert-l1-1-0.dll' +} + + +HERE = osp.dirname(osp.abspath(__file__)) +PACKAGE_ROOT = osp.dirname(osp.dirname(HERE)) +PLATFORM_ARCH = platform.machine() +PYTHON_VERSION = sys.version_info + + +def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE): + """Yield pieces of data from a file-like object until EOF.""" + while True: + chunk = file.read(size) + if not chunk: + break + yield chunk + + +def rehash(path, blocksize=1 << 20): + """Return (hash, length) for path using hashlib.sha256()""" + h = hashlib.sha256() + length = 0 + with open(path, 'rb') as f: + for block in read_chunks(f, size=blocksize): + length += len(block) + h.update(block) + digest = 'sha256=' + urlsafe_b64encode( + h.digest() + ).decode('latin1').rstrip('=') + # unicode/str python2 issues + return (digest, str(length)) # type: ignore + + +def unzip_file(file, dest): + """Decompress zip `file` into directory `dest`.""" + with zipfile.ZipFile(file, 'r') as zip_ref: + zip_ref.extractall(dest) + + +def is_program_installed(basename): + """ + Return program absolute path if installed in PATH. + Otherwise, return None + On macOS systems, a .app is considered installed if + it exists. + """ + if (sys.platform == 'darwin' and basename.endswith('.app') and + osp.exists(basename)): + return basename + + for path in os.environ["PATH"].split(os.pathsep): + abspath = osp.join(path, basename) + if osp.isfile(abspath): + return abspath + + +def find_program(basename): + """ + Find program in PATH and return absolute path + Try adding .exe or .bat to basename on Windows platforms + (return None if not found) + """ + names = [basename] + if os.name == 'nt': + # Windows platforms + extensions = ('.exe', '.bat', '.cmd', '.dll') + if not basename.endswith(extensions): + names = [basename + ext for ext in extensions] + [basename] + for name in names: + path = is_program_installed(name) + if path: + return path + + +def patch_new_path(library_path, new_dir): + library = osp.basename(library_path) + name, *rest = library.split('.') + rest = '.'.join(rest) + hash_id = hashlib.sha256(library_path.encode('utf-8')).hexdigest()[:8] + new_name = '.'.join([name, hash_id, rest]) + return osp.join(new_dir, new_name) + + +def find_dll_dependencies(dumpbin, binary): + out = subprocess.run([dumpbin, "/dependents", binary], + stdout=subprocess.PIPE) + out = out.stdout.strip().decode('utf-8') + start_index = out.find('dependencies:') + len('dependencies:') + end_index = out.find('Summary') + dlls = out[start_index:end_index].strip() + dlls = dlls.split(os.linesep) + dlls = [dll.strip() for dll in dlls] + return dlls + + +def relocate_elf_library(patchelf, output_dir, output_library, binary): + """ + Relocate an ELF shared library to be packaged on a wheel. + + Given a shared library, find the transitive closure of its dependencies, + rename and copy them into the wheel while updating their respective rpaths. + """ + + print('Relocating {0}'.format(binary)) + binary_path = osp.join(output_library, binary) + + ld_tree = lddtree(binary_path) + tree_libs = ld_tree['libs'] + + binary_queue = [(n, binary) for n in ld_tree['needed']] + binary_paths = {binary: binary_path} + binary_dependencies = {} + + while binary_queue != []: + library, parent = binary_queue.pop(0) + library_info = tree_libs[library] + print(library) + + if library_info['path'] is None: + print('Omitting {0}'.format(library)) + continue + + if library in ALLOWLIST: + # Omit glibc/gcc/system libraries + print('Omitting {0}'.format(library)) + continue + + parent_dependencies = binary_dependencies.get(parent, []) + parent_dependencies.append(library) + binary_dependencies[parent] = parent_dependencies + + if library in binary_paths: + continue + + binary_paths[library] = library_info['path'] + binary_queue += [(n, library) for n in library_info['needed']] + + print('Copying dependencies to wheel directory') + new_libraries_path = osp.join(output_dir, 'torchvision.libs') + os.makedirs(new_libraries_path) + + new_names = {binary: binary_path} + + for library in binary_paths: + if library != binary: + library_path = binary_paths[library] + new_library_path = patch_new_path(library_path, new_libraries_path) + print('{0} -> {1}'.format(library, new_library_path)) + shutil.copyfile(library_path, new_library_path) + new_names[library] = new_library_path + + print('Updating dependency names by new files') + for library in binary_paths: + if library != binary: + if library not in binary_dependencies: + continue + library_dependencies = binary_dependencies[library] + new_library_name = new_names[library] + for dep in library_dependencies: + new_dep = osp.basename(new_names[dep]) + print('{0}: {1} -> {2}'.format(library, dep, new_dep)) + subprocess.check_output( + [ + patchelf, + '--replace-needed', + dep, + new_dep, + new_library_name + ], + cwd=new_libraries_path) + + print('Updating library rpath') + subprocess.check_output( + [ + patchelf, + '--set-rpath', + "$ORIGIN", + new_library_name + ], + cwd=new_libraries_path) + + subprocess.check_output( + [ + patchelf, + '--print-rpath', + new_library_name + ], + cwd=new_libraries_path) + + print("Update library dependencies") + library_dependencies = binary_dependencies[binary] + for dep in library_dependencies: + new_dep = osp.basename(new_names[dep]) + print('{0}: {1} -> {2}'.format(binary, dep, new_dep)) + subprocess.check_output( + [ + patchelf, + '--replace-needed', + dep, + new_dep, + binary + ], + cwd=output_library) + + print('Update library rpath') + subprocess.check_output( + [ + patchelf, + '--set-rpath', + "$ORIGIN:$ORIGIN/../torchvision.libs", + binary_path + ], + cwd=output_library + ) + + +def relocate_dll_library(dumpbin, output_dir, output_library, binary): + """ + Relocate a DLL/PE shared library to be packaged on a wheel. + + Given a shared library, find the transitive closure of its dependencies, + rename and copy them into the wheel. + """ + print('Relocating {0}'.format(binary)) + binary_path = osp.join(output_library, binary) + + library_dlls = find_dll_dependencies(dumpbin, binary_path) + binary_queue = [(dll, binary) for dll in library_dlls] + binary_paths = {binary: binary_path} + binary_dependencies = {} + + while binary_queue != []: + library, parent = binary_queue.pop(0) + if library in WINDOWS_ALLOWLIST or library.startswith('api-ms-win'): + print('Omitting {0}'.format(library)) + continue + + library_path = find_program(library) + if library_path is None: + print('{0} not found'.format(library)) + continue + + if osp.basename(osp.dirname(library_path)) == 'system32': + continue + + print('{0}: {1}'.format(library, library_path)) + parent_dependencies = binary_dependencies.get(parent, []) + parent_dependencies.append(library) + binary_dependencies[parent] = parent_dependencies + + if library in binary_paths: + continue + + binary_paths[library] = library_path + downstream_dlls = find_dll_dependencies(dumpbin, library_path) + binary_queue += [(n, library) for n in downstream_dlls] + + print('Copying dependencies to wheel directory') + package_dir = osp.join(output_dir, 'torchvision') + for library in binary_paths: + if library != binary: + library_path = binary_paths[library] + new_library_path = osp.join(package_dir, library) + print('{0} -> {1}'.format(library, new_library_path)) + shutil.copyfile(library_path, new_library_path) + + +def compress_wheel(output_dir, wheel, wheel_dir, wheel_name): + """Create RECORD file and compress wheel distribution.""" + print('Update RECORD file in wheel') + dist_info = glob.glob(osp.join(output_dir, '*.dist-info'))[0] + record_file = osp.join(dist_info, 'RECORD') + + with open(record_file, 'w') as f: + for root, _, files in os.walk(output_dir): + for this_file in files: + full_file = osp.join(root, this_file) + rel_file = osp.relpath(full_file, output_dir) + if full_file == record_file: + f.write('{0},,\n'.format(rel_file)) + else: + digest, size = rehash(full_file) + f.write('{0},{1},{2}\n'.format(rel_file, digest, size)) + + print('Compressing wheel') + base_wheel_name = osp.join(wheel_dir, wheel_name) + shutil.make_archive(base_wheel_name, 'zip', output_dir) + os.remove(wheel) + shutil.move('{0}.zip'.format(base_wheel_name), wheel) + shutil.rmtree(output_dir) + + +def patch_linux(): + # Get patchelf location + patchelf = find_program('patchelf') + if patchelf is None: + raise FileNotFoundError('Patchelf was not found in the system, please' + ' make sure that is available on the PATH.') + + # Find wheel + print('Finding wheels...') + wheels = glob.glob(osp.join(PACKAGE_ROOT, 'dist', '*.whl')) + output_dir = osp.join(PACKAGE_ROOT, 'dist', '.wheel-process') + + image_binary = 'image.so' + video_binary = 'video_reader.so' + torchvision_binaries = [image_binary, video_binary] + for wheel in wheels: + if osp.exists(output_dir): + shutil.rmtree(output_dir) + + os.makedirs(output_dir) + + print('Unzipping wheel...') + wheel_file = osp.basename(wheel) + wheel_dir = osp.dirname(wheel) + print('{0}'.format(wheel_file)) + wheel_name, _ = osp.splitext(wheel_file) + unzip_file(wheel, output_dir) + + print('Finding ELF dependencies...') + output_library = osp.join(output_dir, 'torchvision') + for binary in torchvision_binaries: + if osp.exists(osp.join(output_library, binary)): + relocate_elf_library( + patchelf, output_dir, output_library, binary) + + compress_wheel(output_dir, wheel, wheel_dir, wheel_name) + + +def patch_win(): + # Get dumpbin location + dumpbin = find_program('dumpbin') + if dumpbin is None: + raise FileNotFoundError('Dumpbin was not found in the system, please' + ' make sure that is available on the PATH.') + + # Find wheel + print('Finding wheels...') + wheels = glob.glob(osp.join(PACKAGE_ROOT, 'dist', '*.whl')) + output_dir = osp.join(PACKAGE_ROOT, 'dist', '.wheel-process') + + image_binary = 'image.pyd' + video_binary = 'video_reader.pyd' + torchvision_binaries = [image_binary, video_binary] + for wheel in wheels: + if osp.exists(output_dir): + shutil.rmtree(output_dir) + + os.makedirs(output_dir) + + print('Unzipping wheel...') + wheel_file = osp.basename(wheel) + wheel_dir = osp.dirname(wheel) + print('{0}'.format(wheel_file)) + wheel_name, _ = osp.splitext(wheel_file) + unzip_file(wheel, output_dir) + + print('Finding DLL/PE dependencies...') + output_library = osp.join(output_dir, 'torchvision') + for binary in torchvision_binaries: + if osp.exists(osp.join(output_library, binary)): + relocate_dll_library( + dumpbin, output_dir, output_library, binary) + + compress_wheel(output_dir, wheel, wheel_dir, wheel_name) + + +if __name__ == '__main__': + if sys.platform == 'linux': + patch_linux() + elif sys.platform == 'win32': + patch_win() diff --git a/packaging/windows/azure-pipelines-ci.yml b/packaging/windows/azure-pipelines-ci.yml deleted file mode 100644 index 6f9f3468cfe..00000000000 --- a/packaging/windows/azure-pipelines-ci.yml +++ /dev/null @@ -1,11 +0,0 @@ - -# Turn off auto builds for commits -trigger: none -pr: none - -jobs: -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CPU' - msagent: true diff --git a/packaging/windows/azure-pipelines.yml b/packaging/windows/azure-pipelines.yml deleted file mode 100644 index d0240570012..00000000000 --- a/packaging/windows/azure-pipelines.yml +++ /dev/null @@ -1,35 +0,0 @@ - -# Turn off auto builds for commits -trigger: none -pr: none - -jobs: -- template: templates/auth_task.yml - -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CPU' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Conda' - spec: 'CPU' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Wheels' - spec: 'CUDA' - msagent: true - -- template: templates/build_task.yml - parameters: - package: 'Conda' - spec: 'CUDA' - msagent: true - -- template: templates/linux_build_task.yml - parameters: - msagent: $(ms.hosted.agent.cpu) diff --git a/packaging/windows/build_vision.bat b/packaging/windows/build_vision.bat deleted file mode 100644 index 995c43905cb..00000000000 --- a/packaging/windows/build_vision.bat +++ /dev/null @@ -1,145 +0,0 @@ -@echo off - -:: This script parses args, installs required libraries (miniconda, MKL, -:: Magma), and then delegates to cpu.bat, cuda80.bat, etc. - -IF NOT "%CUDA_VERSION%" == "" IF NOT "%TORCHVISION_BUILD_VERSION%" == "" if NOT "%TORCHVISION_BUILD_NUMBER%" == "" goto env_end -if "%~1"=="" goto arg_error -if "%~2"=="" goto arg_error -if "%~3"=="" goto arg_error -if NOT "%~4"=="" goto arg_error -goto arg_end - -:arg_error - -echo Illegal number of parameters. Pass cuda version, pytorch version, build number -echo CUDA version should be Mm with no dot, e.g. '80' -echo DESIRED_PYTHON should be M.m, e.g. '2.7' -exit /b 1 - -:arg_end - -set CUDA_VERSION=%~1 -set TORCHVISION_BUILD_VERSION=%~2 -set TORCHVISION_BUILD_NUMBER=%~3 - -set BUILD_VERSION=%TORCHVISION_BUILD_VERSION% - -:env_end - -if NOT "%CUDA_VERSION%" == "cpu" ( - set CUDA_PREFIX=cuda%CUDA_VERSION% - set CUVER=cu%CUDA_VERSION% - set FORCE_CUDA=1 -) else ( - set CUDA_PREFIX=cpu - set CUVER=cpu -) - -set BUILD_VISION=1 -REM set TORCH_WHEEL=torch -f https://download.pytorch.org/whl/%CUVER%/stable.html --no-index - -IF "%DESIRED_PYTHON%" == "" set DESIRED_PYTHON=3.5;3.6;3.7 -set DESIRED_PYTHON_PREFIX=%DESIRED_PYTHON:.=% -set DESIRED_PYTHON_PREFIX=py%DESIRED_PYTHON_PREFIX:;=;py% - -set SRC_DIR=%~dp0 -pushd %SRC_DIR% - -:: Install Miniconda3 -set "CONDA_HOME=%CD%\conda" -set "tmp_conda=%CONDA_HOME%" -set "miniconda_exe=%CD%\miniconda.exe" -rmdir /s /q conda -del miniconda.exe -curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" -call ..\conda\install_conda.bat -IF ERRORLEVEL 1 exit /b 1 -set "ORIG_PATH=%PATH%" -set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" - -:: Create a new conda environment -setlocal EnableDelayedExpansion -FOR %%v IN (%DESIRED_PYTHON%) DO ( - set PYTHON_VERSION_STR=%%v - set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! - conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s - conda create -n py!PYTHON_VERSION_STR! -y -q -c defaults -c conda-forge numpy>=1.11 mkl>=2018 python=%%v ca-certificates scipy av -) - -:: Uncomment for stable releases -:: FOR %%v IN (%DESIRED_PYTHON%) DO ( -:: set PYTHON_VERSION_STR=%%v -:: set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! -:: set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%" - -:: if "%CUDA_VERSION%" == "100" ( -:: set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl -:: ) else ( -:: set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0%%2B%CUVER%-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl -:: ) -:: echo Installing !TORCH_WHEEL!... -:: pip install "!TORCH_WHEEL!" -:: ) - -:: Uncomment for nightly releases -FOR %%v IN (%DESIRED_PYTHON%) DO ( - set PYTHON_VERSION_STR=%%v - set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! - set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%" - - set TORCH_WHEEL=torch --pre -f https://download.pytorch.org/whl/nightly/%CUVER%/torch_nightly.html - echo Installing !TORCH_WHEEL!... - pip install !TORCH_WHEEL! -) - -endlocal - -if "%DEBUG%" == "1" ( - set BUILD_TYPE=debug -) ELSE ( - set BUILD_TYPE=release -) - -:: Install sccache -if "%USE_SCCACHE%" == "1" ( - mkdir %CD%\tmp_bin - curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\tmp_bin\sccache.exe - if not "%CUDA_VERSION%" == "" ( - copy %CD%\tmp_bin\sccache.exe %CD%\tmp_bin\nvcc.exe - - set CUDA_NVCC_EXECUTABLE=%CD%\tmp_bin\nvcc - set "PATH=%CD%\tmp_bin;%PATH%" - ) -) - -for %%v in (%DESIRED_PYTHON_PREFIX%) do ( - :: Activate Python Environment - set PYTHON_PREFIX=%%v - set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%" - if defined INCLUDE ( - set "INCLUDE=%INCLUDE%;%CONDA_HOME%\envs\%%v\Library\include" - ) else ( - set "INCLUDE=%CONDA_HOME%\envs\%%v\Library\include" - ) - if defined LIB ( - set "LIB=%LIB%;%CONDA_HOME%\envs\%%v\Library\lib" - ) else ( - set "LIB=%CONDA_HOME%\envs\%%v\Library\lib" - ) - @setlocal - :: Set Flags - if NOT "%CUDA_VERSION%"=="cpu" ( - set CUDNN_VERSION=7 - ) - call %CUDA_PREFIX%.bat - IF ERRORLEVEL 1 exit /b 1 - call internal\test.bat - IF ERRORLEVEL 1 exit /b 1 - @endlocal -) - -set "PATH=%ORIG_PATH%" -popd - -IF ERRORLEVEL 1 exit /b 1 diff --git a/packaging/windows/cpu.bat b/packaging/windows/cpu.bat deleted file mode 100644 index 392a687f9dc..00000000000 --- a/packaging/windows/cpu.bat +++ /dev/null @@ -1,37 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -echo Disabling CUDA -set NO_CUDA=1 -set USE_CUDA=0 - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy_cpu.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/cuda101.bat b/packaging/windows/cuda101.bat deleted file mode 100644 index db397d593c8..00000000000 --- a/packaging/windows/cuda101.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V10_1%"=="" ( - echo CUDA 10.1 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V10_1%" - set "PATH=%CUDA_PATH_V10_1%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/cuda92.bat b/packaging/windows/cuda92.bat deleted file mode 100644 index 0bfcdc8e463..00000000000 --- a/packaging/windows/cuda92.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set USE_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V9_2%"=="" ( - echo CUDA 9.2 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V9_2%" - set "PATH=%CUDA_PATH_V9_2%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/internal/auth.bat b/packaging/windows/internal/auth.bat deleted file mode 100644 index c874bce493c..00000000000 --- a/packaging/windows/internal/auth.bat +++ /dev/null @@ -1,46 +0,0 @@ -@echo off - -: From the following doc, the build won't be triggered if the users don't sign in daily. -: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/triggers?tabs=yaml&view=vsts#my-build-didnt-run-what-happened -: To avoid this problem, we can just go through the sign in process using the following command. - -:auth_start - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -for /f "usebackq tokens=*" %%i in (`curl -so NUL -w "%%{http_code}" -u %VSTS_AUTH% https://dev.azure.com/pytorch`) do ( - set STATUS_CODE=%%i -) - -IF NOT "%STATUS_CODE%" == "200" ( - echo Auth retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Auth failed - goto err - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto auth_start -) ELSE ( - echo Login Attempt Succeeded - goto auth_end -) - -:err - -: Throw a warning if it fails -powershell -c "Write-Warning 'Login Attempt Failed'" - -:auth_end - -set RETRY_TIMES= -set SLEEP_TIME= -set STATUS_CODE= - -exit /b 0 diff --git a/packaging/windows/internal/build_cmake.bat b/packaging/windows/internal/build_cmake.bat new file mode 100644 index 00000000000..a29160538d2 --- /dev/null +++ b/packaging/windows/internal/build_cmake.bat @@ -0,0 +1,3 @@ +@echo on +msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" torchvision.vcxproj -maxcpucount:%1 +msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" INSTALL.vcxproj -maxcpucount:%1 diff --git a/packaging/windows/internal/build_cpp_example.bat b/packaging/windows/internal/build_cpp_example.bat new file mode 100644 index 00000000000..e3f7afe9f02 --- /dev/null +++ b/packaging/windows/internal/build_cpp_example.bat @@ -0,0 +1,3 @@ +@echo on +set CL=/I"C:\Program Files (x86)\torchvision\include" +msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" hello-world.vcxproj -maxcpucount:%1 diff --git a/packaging/windows/internal/build_frcnn.bat b/packaging/windows/internal/build_frcnn.bat new file mode 100644 index 00000000000..36e3757d01c --- /dev/null +++ b/packaging/windows/internal/build_frcnn.bat @@ -0,0 +1,3 @@ +@echo on +set CL=/I"C:\Program Files (x86)\torchvision\include" +msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" test_frcnn_tracing.vcxproj -maxcpucount:%1 diff --git a/packaging/windows/internal/check_deps.bat b/packaging/windows/internal/check_deps.bat deleted file mode 100644 index a159d4436d6..00000000000 --- a/packaging/windows/internal/check_deps.bat +++ /dev/null @@ -1,67 +0,0 @@ -@echo off - -REM Check for necessary components - -IF NOT "%PROCESSOR_ARCHITECTURE%"=="AMD64" ( - echo You should use 64 bits Windows to build and run PyTorch - exit /b 1 -) - -IF "%BUILD_VISION%" == "" ( - where /q cmake.exe - - IF ERRORLEVEL 1 ( - echo CMake is required to compile PyTorch on Windows - exit /b 1 - ) -) - -IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15INSTALLDIR=%%i" - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto vswhere - ) -) - -:vswhere -IF "%VS15VCVARSALL%"=="" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -set MSSdk=1 -set DISTUTILS_USE_SDK=1 - -where /q python.exe - -IF ERRORLEVEL 1 ( - echo Python x64 3.5 or up is required to compile PyTorch on Windows - exit /b 1 -) - -for /F "usebackq delims=" %%i in (`python -c "import sys; print('{0[0]}{0[1]}'.format(sys.version_info))"`) do ( - set /a PYVER=%%i -) - -if %PYVER% LSS 35 ( - echo Warning: PyTorch for Python 2 under Windows is experimental. - echo Python x64 3.5 or up is recommended to compile PyTorch on Windows - echo Maybe you can create a virual environment if you have conda installed: - echo ^> conda create -n test python=3.6 pyyaml mkl numpy - echo ^> activate test -) - -for /F "usebackq delims=" %%i in (`python -c "import struct;print( 8 * struct.calcsize('P'))"`) do ( - set /a PYSIZE=%%i -) - -if %PYSIZE% NEQ 64 ( - echo Python x64 3.5 or up is required to compile PyTorch on Windows - exit /b 1 -) diff --git a/packaging/windows/internal/check_opts.bat b/packaging/windows/internal/check_opts.bat deleted file mode 100644 index 003ad921328..00000000000 --- a/packaging/windows/internal/check_opts.bat +++ /dev/null @@ -1,33 +0,0 @@ -@echo off - -REM Check for optional components - -where /q ninja.exe - -IF NOT ERRORLEVEL 1 ( - echo Ninja found, using it to speed up builds - set CMAKE_GENERATOR=Ninja -) - -where /q clcache.exe - -IF NOT ERRORLEVEL 1 ( - echo clcache found, using it to speed up builds - set CC=clcache - set CXX=clcache -) - -where /q sccache.exe - -IF NOT ERRORLEVEL 1 ( - echo sccache found, using it to speed up builds - set CC=sccache cl - set CXX=sccache cl -) - -IF exist "%MKLProductDir%\mkl\lib\intel64_win" ( - echo MKL found, adding it to build - set "LIB=%MKLProductDir%\mkl\lib\intel64_win;%MKLProductDir%\compiler\lib\intel64_win;%LIB%"; -) - -exit /b 0 diff --git a/packaging/windows/internal/clean.bat b/packaging/windows/internal/clean.bat deleted file mode 100644 index 7489640f49a..00000000000 --- a/packaging/windows/internal/clean.bat +++ /dev/null @@ -1,5 +0,0 @@ -@echo off - -cd %MODULE_NAME% -python setup.py clean -cd .. diff --git a/packaging/windows/internal/clone.bat b/packaging/windows/internal/clone.bat deleted file mode 100644 index 4ba181fa804..00000000000 --- a/packaging/windows/internal/clone.bat +++ /dev/null @@ -1,56 +0,0 @@ -@echo off - -:: The conda and wheels jobs are seperated on Windows, so we don't need to clone again. -IF "%BUILD_VISION%" == "" ( - if exist "%NIGHTLIES_PYTORCH_ROOT%" ( - xcopy /E /Y /Q "%NIGHTLIES_PYTORCH_ROOT%" pytorch\ - cd pytorch - goto submodule - ) -) - -git clone https://github.com/%PYTORCH_REPO%/%MODULE_NAME% - -cd %MODULE_NAME% - -IF NOT "%BUILD_VISION%" == "" goto latest_end - -IF "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end ) - -:latest_start - -if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end ) - -:date_start - -set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'" -set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'" - -FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i -FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i - -:date_end - -if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2% - -:: Switch to the latest commit by 11:59 yesterday -echo PYTORCH_BRANCH is set to latest so I will find the last commit -echo before 0:00 midnight on %NIGHTLIES_DATE% -set git_date=%NIGHTLIES_DATE:_=-% -FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i -echo Setting PYTORCH_BRANCH to %last_commit% since that was the last -echo commit before %NIGHTLIES_DATE% -set PYTORCH_BRANCH=%last_commit% - -:latest_end - -IF "%PYTORCH_BRANCH%" == "" ( - set PYTORCH_BRANCH=v%TORCHVISION_BUILD_VERSION% -) -git checkout %PYTORCH_BRANCH% -IF ERRORLEVEL 1 git checkout tags/%PYTORCH_BRANCH% - -:submodule - -git submodule update --init --recursive -IF ERRORLEVEL 1 exit /b 1 diff --git a/packaging/windows/internal/copy.bat b/packaging/windows/internal/copy.bat deleted file mode 100644 index b4aa397c6c1..00000000000 --- a/packaging/windows/internal/copy.bat +++ /dev/null @@ -1,13 +0,0 @@ -copy "%CUDA_PATH%\bin\cusparse64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cublas64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cudart64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\curand64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cufft64_%CUDA_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\cufftw64_%CUDA_VERSION%.dll*" pytorch\torch\lib - -copy "%CUDA_PATH%\bin\cudnn64_%CUDNN_VERSION%.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\nvrtc64_%CUDA_VERSION%*.dll*" pytorch\torch\lib -copy "%CUDA_PATH%\bin\nvrtc-builtins64_%CUDA_VERSION%.dll*" pytorch\torch\lib - -copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib -copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib diff --git a/packaging/windows/internal/copy_cpu.bat b/packaging/windows/internal/copy_cpu.bat deleted file mode 100644 index f5b9d11515f..00000000000 --- a/packaging/windows/internal/copy_cpu.bat +++ /dev/null @@ -1 +0,0 @@ -copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat index cdd5a9ac206..9ca08e1cfbb 100644 --- a/packaging/windows/internal/cuda_install.bat +++ b/packaging/windows/internal/cuda_install.bat @@ -1,6 +1,6 @@ @echo on -if "%CUDA_VERSION%" == "cpu" ( +if "%CU_VERSION%" == "cpu" ( echo Skipping for CPU builds exit /b 0 ) @@ -9,14 +9,18 @@ set SRC_DIR=%~dp0\.. if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build" -set /a CUDA_VER=%CUDA_VERSION% -set CUDA_VER_MAJOR=%CUDA_VERSION:~0,-1% -set CUDA_VER_MINOR=%CUDA_VERSION:~-1,1% +set /a CUDA_VER=%CU_VERSION:cu=% +set CUDA_VER_MAJOR=%CUDA_VER:~0,-1% +set CUDA_VER_MINOR=%CUDA_VER:~-1,1% set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR% if %CUDA_VER% EQU 92 goto cuda92 if %CUDA_VER% EQU 100 goto cuda100 if %CUDA_VER% EQU 101 goto cuda101 +if %CUDA_VER% EQU 102 goto cuda102 +if %CUDA_VER% EQU 110 goto cuda110 +if %CUDA_VER% EQU 111 goto cuda111 +if %CUDA_VER% EQU 112 goto cuda112 echo CUDA %CUDA_VERSION_STR% is not supported exit /b 1 @@ -71,6 +75,78 @@ if not exist "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" ( goto cuda_common +:cuda102 + +if not exist "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.2.89_441.22_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" + set "ARGS=nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.2-windows10-x64-v7.6.5.32.zip --output "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" +) + +goto cuda_common + +:cuda110 + +if not exist "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.0.2_451.48_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" + set "ARGS=nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.0-windows-x64-v8.0.4.30.zip --output "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" +) + +goto cuda_common + +:cuda111 + +if not exist "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.1.0_456.43_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" + set "ARGS=nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1" +) + +@REM There is no downloadable driver for Tesla on CUDA 11.1 yet. We will use +@REM the driver inside CUDA +if "%JOB_EXECUTOR%" == "windows-with-nvidia-gpu" set "ARGS=%ARGS% Display.Driver" + +if not exist "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.1-windows-x64-v8.0.5.39.zip --output "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" +) + +goto cuda_common + +:cuda112 + +if not exist "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.2.0_460.89_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" + set "ARGS=nvcc_11.2 cuobjdump_11.2 nvprune_11.2 nvprof_11.2 cupti_11.2 cublas_11.2 cublas_dev_11.2 cudart_11.2 cufft_11.2 cufft_dev_11.2 curand_11.2 curand_dev_11.2 cusolver_11.2 cusolver_dev_11.2 cusparse_11.2 cusparse_dev_11.2 npp_11.2 npp_dev_11.2 nvrtc_11.2 nvrtc_dev_11.2 nvml_dev_11.2" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" ( + curl -k -L http://s3.amazonaws.com/ossci-windows/cudnn-11.2-windows-x64-v8.1.0.77.zip --output "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" +) + +goto cuda_common + :cuda_common if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" ( @@ -78,6 +154,11 @@ if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" ( if errorlevel 1 exit /b 1 ) +if not exist "%SRC_DIR%\temp_build\gpu_driver_dlls.7z" ( + curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "%SRC_DIR%\temp_build\gpu_driver_dlls.zip" + if errorlevel 1 exit /b 1 +) + echo Installing CUDA toolkit... 7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda" pushd "%SRC_DIR%\temp_build\cuda" @@ -113,5 +194,8 @@ xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\bin\*.*" "%ProgramFiles%\NVIDIA GPU Co xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\lib\x64\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\lib\x64" xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\include\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\include" +echo Installing GPU driver DLLs +7z x %SRC_DIR%\temp_build\gpu_driver_dlls.zip -o"C:\Windows\System32" + echo Cleaning temp files rd /s /q "%SRC_DIR%\temp_build" || ver > nul diff --git a/packaging/windows/internal/dep_install.bat b/packaging/windows/internal/dep_install.bat deleted file mode 100644 index db665a99f26..00000000000 --- a/packaging/windows/internal/dep_install.bat +++ /dev/null @@ -1,14 +0,0 @@ -@echo off - -REM curl -k https://www.7-zip.org/a/7z1805-x64.exe -O -REM if errorlevel 1 exit /b 1 - -REM start /wait 7z1805-x64.exe /S -REM if errorlevel 1 exit /b 1 - -REM set "PATH=%ProgramFiles%\7-Zip;%PATH%" - -choco feature disable --name showDownloadProgress -choco feature enable --name allowGlobalConfirmation - -choco install curl 7zip diff --git a/packaging/windows/internal/env_fix.bat b/packaging/windows/internal/env_fix.bat deleted file mode 100644 index dd0aaf5f2d5..00000000000 --- a/packaging/windows/internal/env_fix.bat +++ /dev/null @@ -1,31 +0,0 @@ -@echo off - -:: Caution: Please don't use this script locally -:: It may destroy your build environment. - -setlocal - -IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( - if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( - set "VS15INSTALLDIR=%%i" - set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" - goto vswhere - ) -) - -:vswhere - -IF "%VS15VCVARSALL%"=="" ( - echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows - exit /b 1 -) - -call "%VS15VCVARSALL%" x86_amd64 -for /f "usebackq tokens=*" %%i in (`where link.exe`) do move "%%i" "%%i.bak" - -endlocal diff --git a/packaging/windows/internal/nightly_defaults.bat b/packaging/windows/internal/nightly_defaults.bat deleted file mode 100644 index 1bba23209b1..00000000000 --- a/packaging/windows/internal/nightly_defaults.bat +++ /dev/null @@ -1,200 +0,0 @@ -@echo on - -if "%~1"=="" goto arg_error -if NOT "%~2"=="" goto arg_error -goto arg_end - -:arg_error - -echo Illegal number of parameters. Pass packge type `Conda` or `Wheels`. -exit /b 1 - -:arg_end - -echo "nightly_defaults.bat at %CD% starting at %DATE%" - -set SRC_DIR=%~dp0\.. - -:: NIGHTLIES_FOLDER -:: N.B. this is also defined in cron_start.sh -:: An arbitrary root folder to store all nightlies folders, each of which is a -:: parent level date folder with separate subdirs for logs, wheels, conda -:: packages, etc. This should be kept the same across all scripts called in a -:: cron job, so it only has a default value in the top-most script -:: build_cron.sh to avoid the default values from diverging. -if "%NIGHTLIES_FOLDER%" == "" set "NIGHTLIES_FOLDER=%SRC_DIR%" - -:: NIGHTLIES_DATE -:: N.B. this is also defined in cron_start.sh -:: The date in YYYY_mm_dd format that we are building for. If this is not -:: already set, then this will first try to find the date of the nightlies -:: folder that this builder repo exists in; e.g. if this script exists in -:: some_dir/2019_09_04/builder/cron/ then this will be set to 2019_09_04 (must -:: match YYYY_mm_dd). This is for convenience when debugging/uploading past -:: dates, so that you don't have to set NIGHTLIES_DATE yourself. If a date -:: folder cannot be found in that exact location, then this will default to -:: the current date. - - -if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end ) - -:date_start - -set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'" -set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'" - -FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i -FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i - -:date_end - -if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2% - -:: Used in lots of places as the root dir to store all conda/wheel/manywheel -:: packages as well as logs for the day -set today=%NIGHTLIES_FOLDER%\%NIGHTLIES_DATE% -mkdir "%today%" || ver >nul - - -::############################################################################# -:: Add new configuration variables below this line. 'today' should always be -:: defined ASAP to avoid weird errors -::############################################################################# - - -:: List of people to email when things go wrong. This is passed directly to -:: `mail -t` -:: TODO: Not supported yet -if "%NIGHTLIES_EMAIL_LIST%" == "" set NIGHTLIES_EMAIL_LIST=peterghost86@gmail.com - -:: PYTORCH_CREDENTIALS_FILE -:: A bash file that exports credentials needed to upload to aws and anaconda. -:: Needed variables are PYTORCH_ANACONDA_USERNAME, PYTORCH_ANACONDA_PASSWORD, -:: AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. Or it can just export the AWS -:: keys and then prepend a logged-in conda installation to the path. -:: TODO: Not supported yet -if "%PYTORCH_CREDENTIALS_FILE%" == "" set PYTORCH_CREDENTIALS_FILE=/c/Users/administrator/nightlies/credentials.sh - -:: Location of the temporary miniconda that is downloaded to install conda-build -:: and aws to upload finished packages TODO this is messy to install this in -:: upload.sh and later use it in upload_logs.sh -if "%CONDA_UPLOADER_INSTALLATION%" == "" set "CONDA_UPLOADER_INSTALLATION=%today%\miniconda" - -:: N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that -:: is the script that actually clones the builder repo that /this/ script is -:: running from. -pushd "%SRC_DIR%\.." -set NIGHTLIES_BUILDER_ROOT=%CD% -popd - -:: The shared pytorch repo to be used by all builds -if "%NIGHTLIES_PYTORCH_ROOT%" == "" set "NIGHTLIES_PYTORCH_ROOT=%today%\vision" - -:: PYTORCH_REPO -:: The Github org/user whose fork of Pytorch to check out (git clone -:: https://github.com//pytorch.git). This will always be cloned -:: fresh to build with. Default is 'pytorch' -if "%PYTORCH_REPO%" == "" set PYTORCH_REPO=pytorch - -:: PYTORCH_BRANCH -:: The branch of Pytorch to checkout for building (git checkout ). -:: This can either be the name of the branch (e.g. git checkout -:: my_branch_name) or can be a git commit (git checkout 4b2674n...). Default -:: is 'latest', which is a special term that signals to pull the last commit -:: before 0:00 midnight on the NIGHTLIES_DATE -if "%PYTORCH_BRANCH%" == "" set PYTORCH_BRANCH=latest - -:: Clone the requested pytorch checkout -if exist "%NIGHTLIES_PYTORCH_ROOT%" ( goto clone_end ) else ( goto clone_start ) - -:clone_start - -git clone --recursive "https://github.com/%PYTORCH_REPO%/vision.git" "%NIGHTLIES_PYTORCH_ROOT%" -pushd "%NIGHTLIES_PYTORCH_ROOT%" - -if "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end ) - -:latest_start - -:: Switch to the latest commit by 11:59 yesterday -echo PYTORCH_BRANCH is set to latest so I will find the last commit -echo before 0:00 midnight on %NIGHTLIES_DATE% -set git_date=%NIGHTLIES_DATE:_=-% -FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i -echo Setting PYTORCH_BRANCH to %last_commit% since that was the last -echo commit before %NIGHTLIES_DATE% -set PYTORCH_BRANCH=%last_commit% - -:latest_end - -git checkout "%PYTORCH_BRANCH%" -git submodule update -popd - -:clone_end - -if "%CUDA_VERSION%" == "cpu" ( - set _DESIRED_CUDA=cpu -) else ( - set _DESIRED_CUDA=cu%CUDA_VERSION% -) - -:: PYTORCH_BUILD_VERSION -:: The actual version string. Used in conda like -:: pytorch-nightly==1.0.0.dev20180908 -:: or in manylinux like -:: torch_nightly-1.0.0.dev20180908-cp27-cp27m-linux_x86_64.whl -if "%TORCHVISION_BUILD_VERSION%" == "" set TORCHVISION_BUILD_VERSION=0.5.0.dev%NIGHTLIES_DATE_COMPACT% - -if "%~1" == "Wheels" ( - if not "%CUDA_VERSION%" == "101" ( - set TORCHVISION_BUILD_VERSION=%TORCHVISION_BUILD_VERSION%+%_DESIRED_CUDA% - ) -) - -:: PYTORCH_BUILD_NUMBER -:: This is usually the number 1. If more than one build is uploaded for the -:: same version/date, then this can be incremented to 2,3 etc in which case -:: '.post2' will be appended to the version string of the package. This can -:: be set to '0' only if OVERRIDE_PACKAGE_VERSION is being used to bypass -:: all the version string logic in downstream scripts. Since we use the -:: override below, exporting this shouldn't actually matter. -if "%TORCHVISION_BUILD_NUMBER%" == "" set /a TORCHVISION_BUILD_NUMBER=1 -if %TORCHVISION_BUILD_NUMBER% GTR 1 set TORCHVISION_BUILD_VERSION=%TORCHVISION_BUILD_VERSION%%TORCHVISION_BUILD_NUMBER% - -:: The nightly builds use their own versioning logic, so we override whatever -:: logic is in setup.py or other scripts -:: TODO: Not supported yet -set OVERRIDE_PACKAGE_VERSION=%TORCHVISION_BUILD_VERSION% -set BUILD_VERSION=%TORCHVISION_BUILD_VERSION% - -:: Build folder for conda builds to use -if "%TORCH_CONDA_BUILD_FOLDER%" == "" set TORCH_CONDA_BUILD_FOLDER=torchvision - -:: TORCH_PACKAGE_NAME -:: The name of the package to upload. This should probably be pytorch or -:: pytorch-nightly. N.B. that pip will change all '-' to '_' but conda will -:: not. This is dealt with in downstream scripts. -:: TODO: Not supported yet -if "%TORCH_PACKAGE_NAME%" == "" set TORCH_PACKAGE_NAME=torchvision - -:: PIP_UPLOAD_FOLDER should end in a slash. This is to handle it being empty -:: (when uploading to e.g. whl/cpu/) and also to handle nightlies (when -:: uploading to e.g. /whl/nightly/cpu) -:: TODO: Not supported yet -if "%PIP_UPLOAD_FOLDER%" == "" set "PIP_UPLOAD_FOLDER=nightly\" - -:: The location of the binary_sizes dir in s3 is hardcoded into -:: upload_binary_sizes.sh - -:: DAYS_TO_KEEP -:: How many days to keep around for clean.sh. Build folders older than this -:: will be purged at the end of cron jobs. '1' means to keep only the current -:: day. Values less than 1 are not allowed. The default is 5. -:: TODO: Not supported yet -if "%DAYS_TO_KEEP%" == "" set /a DAYS_TO_KEEP=5 -if %DAYS_TO_KEEP% LSS 1 ( - echo DAYS_TO_KEEP cannot be less than 1. - echo A value of 1 means to only keep the build for today - exit /b 1 -) diff --git a/packaging/windows/internal/publish.bat b/packaging/windows/internal/publish.bat deleted file mode 100644 index 7f118bbb6e3..00000000000 --- a/packaging/windows/internal/publish.bat +++ /dev/null @@ -1,89 +0,0 @@ -@echo off - -set SRC_DIR=%~dp0 -pushd %SRC_DIR% - -if NOT "%CUDA_VERSION%" == "cpu" ( - set PACKAGE_SUFFIX=_cuda%CUDA_VERSION% -) else ( - set PACKAGE_SUFFIX= -) - -if "%PACKAGEFULLNAME%" == "Conda" ( - set PACKAGE=conda -) else ( - set PACKAGE=wheels -) - -if not defined PACKAGE_SUFFIX ( - set PUBLISH_BRANCH=vision_%PACKAGE%_%DESIRED_PYTHON% -) else ( - set PUBLISH_BRANCH=vision_%PACKAGE%_%DESIRED_PYTHON%%PACKAGE_SUFFIX% -) - -git clone %ARTIFACT_REPO_URL% -b %PUBLISH_BRANCH% --single-branch >nul 2>&1 - -IF ERRORLEVEL 1 ( - echo Branch %PUBLISH_BRANCH% not exist, falling back to master - set NO_BRANCH=1 - git clone %ARTIFACT_REPO_URL% -b master --single-branch >nul 2>&1 -) - -IF ERRORLEVEL 1 ( - echo Clone failed - goto err -) - -cd pytorch_builder -attrib -s -h -r . /s /d - -:: Empty repo -rd /s /q . || ver >nul - -IF NOT EXIST %PACKAGE% mkdir %PACKAGE% - -xcopy /S /E /Y ..\..\output\*.* %PACKAGE%\ - -git config --global user.name "Azure DevOps" -git config --global user.email peterghost86@gmail.com -git init -git checkout --orphan %PUBLISH_BRANCH% -git remote add origin %ARTIFACT_REPO_URL% -git add . -git commit -m "Update artifacts" - -:push - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -git push origin %PUBLISH_BRANCH% -f > nul 2>&1 - -IF ERRORLEVEL 1 ( - echo Git push retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Push failed - goto err - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto push -) ELSE ( - set RETRY_TIMES= - set SLEEP_TIME= -) - -popd - -exit /b 0 - -:err - -popd - -exit /b 1 diff --git a/packaging/windows/internal/setup.bat b/packaging/windows/internal/setup.bat deleted file mode 100644 index d18dfb35023..00000000000 --- a/packaging/windows/internal/setup.bat +++ /dev/null @@ -1,44 +0,0 @@ -@echo off - -echo The flags after configuring: -echo NO_CUDA=%NO_CUDA% -echo CMAKE_GENERATOR=%CMAKE_GENERATOR% -if "%NO_CUDA%"=="" echo CUDA_PATH=%CUDA_PATH% -if NOT "%CC%"=="" echo CC=%CC% -if NOT "%CXX%"=="" echo CXX=%CXX% -if NOT "%DISTUTILS_USE_SDK%"=="" echo DISTUTILS_USE_SDK=%DISTUTILS_USE_SDK% - -set SRC_DIR=%~dp0\.. - -IF "%VSDEVCMD_ARGS%" == "" ( - call "%VS15VCVARSALL%" x64 -) ELSE ( - call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% -) - -pushd %SRC_DIR% - -IF NOT exist "setup.py" ( - cd %MODULE_NAME% -) - -if "%CXX%"=="sccache cl" ( - sccache --stop-server - sccache --start-server - sccache --zero-stats -) - -:pytorch -:: This stores in e.g. D:/_work/1/s/windows/output/cpu -pip wheel -e . --no-deps --wheel-dir ../output/%CUDA_PREFIX% - -:build_end -IF ERRORLEVEL 1 exit /b 1 -IF NOT ERRORLEVEL 0 exit /b 1 - -if "%CXX%"=="sccache cl" ( - taskkill /im sccache.exe /f /t || ver > nul - taskkill /im nvcc.exe /f /t || ver > nul -) - -cd .. diff --git a/packaging/windows/internal/test.bat b/packaging/windows/internal/test.bat deleted file mode 100644 index a87fc1a2858..00000000000 --- a/packaging/windows/internal/test.bat +++ /dev/null @@ -1,79 +0,0 @@ -@echo off - -set SRC_DIR=%~dp0\.. -pushd %SRC_DIR% - -set PYTHON_VERSION=%PYTHON_PREFIX:py=cp% - -if "%BUILD_VISION%" == "" ( - pip install future pytest coverage hypothesis protobuf -) ELSE ( - pip install future pytest "pillow>=4.1.1" mock -) - -for /F "delims=" %%i in ('where /R %SRC_DIR%\output\%CUDA_PREFIX% *%MODULE_NAME%*%PYTHON_VERSION%*.whl') do pip install "%%i" - -if ERRORLEVEL 1 exit /b 1 - -if NOT "%BUILD_VISION%" == "" ( - echo Smoke testing imports - python -c "import torchvision" - if ERRORLEVEL 1 exit /b 1 - goto smoke_test_end -) - -echo Smoke testing imports -python -c "import torch" -if ERRORLEVEL 1 exit /b 1 - -python -c "from caffe2.python import core" -if ERRORLEVEL 1 exit /b 1 - -echo Checking that MKL is available -python -c "import torch; exit(0 if torch.backends.mkl.is_available() else 1)" -if ERRORLEVEL 1 exit /b 1 - -setlocal EnableDelayedExpansion -set NVIDIA_GPU_EXISTS=0 -for /F "delims=" %%i in ('wmic path win32_VideoController get name') do ( - set GPUS=%%i - if not "x!GPUS:NVIDIA=!" == "x!GPUS!" ( - SET NVIDIA_GPU_EXISTS=1 - goto gpu_check_end - ) -) -:gpu_check_end -endlocal & set NVIDIA_GPU_EXISTS=%NVIDIA_GPU_EXISTS% - -if NOT "%CUDA_PREFIX%" == "cpu" if "%NVIDIA_GPU_EXISTS%" == "1" ( - echo Checking that CUDA archs are setup correctly - python -c "import torch; torch.randn([3,5]).cuda()" - if ERRORLEVEL 1 exit /b 1 - - echo Checking that magma is available - python -c "import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)" - if ERRORLEVEL 1 exit /b 1 - - echo Checking that CuDNN is available - python -c "import torch; exit(0 if torch.backends.cudnn.is_available() else 1)" - if ERRORLEVEL 1 exit /b 1 -) -:smoke_test_end - -echo Not running unit tests. Hopefully these problems are caught by CI -goto test_end - -if "%BUILD_VISION%" == "" ( - cd pytorch\test - python run_test.py -v -) else ( - cd vision - pytest . -) - -if ERRORLEVEL 1 exit /b 1 - -:test_end - -popd -exit /b 0 diff --git a/packaging/windows/internal/upload.bat b/packaging/windows/internal/upload.bat deleted file mode 100644 index a23391a2935..00000000000 --- a/packaging/windows/internal/upload.bat +++ /dev/null @@ -1,96 +0,0 @@ -@echo off - -IF "%CONDA_UPLOADER_INSTALLATION%" == "" goto precheck_fail -IF "%PYTORCH_FINAL_PACKAGE_DIR%" == "" goto precheck_fail -IF "%today%" == "" goto precheck_fail -IF "%PYTORCH_ANACONDA_USERNAME%" == "" goto precheck_fail -IF "%PYTORCH_ANACONDA_PASSWORD%" == "" goto precheck_fail - -goto precheck_pass - -:precheck_fail - -echo Please run nightly_defaults.bat first. -echo And remember to set `PYTORCH_FINAL_PACKAGE_DIR` -echo Finally, don't forget to set anaconda tokens -exit /b 1 - -:precheck_pass - -pushd %today% - -:: Install anaconda client -set "CONDA_HOME=%CONDA_UPLOADER_INSTALLATION%" -set "tmp_conda=%CONDA_HOME%" -set "miniconda_exe=%CD%\miniconda.exe" -rmdir /s /q "%CONDA_HOME%" -del miniconda.exe -curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" -popd - -IF ERRORLEVEL 1 ( - echo Conda download failed - exit /b 1 -) - -call %~dp0\..\..\conda\install_conda.bat - -IF ERRORLEVEL 1 ( - echo Conda installation failed - exit /b 1 -) - -set "ORIG_PATH=%PATH%" -set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" - -REM conda install -y anaconda-client -pip install git+https://github.com/peterjc123/anaconda-client.git@log_more_meaningfull_errors -IF ERRORLEVEL 1 ( - echo Anaconda client installation failed - exit /b 1 -) - -set PYTORCH_FINAL_PACKAGE= -:: Upload all the packages under `PYTORCH_FINAL_PACKAGE_DIR` -FOR /F "delims=" %%i IN ('where /R %PYTORCH_FINAL_PACKAGE_DIR% *vision*.tar.bz2') DO ( - set "PYTORCH_FINAL_PACKAGE=%%i" -) - -IF "%PYTORCH_FINAL_PACKAGE%" == "" ( - echo No package to upload - exit /b 0 -) - -:upload - -if "%RETRY_TIMES%" == "" ( - set /a RETRY_TIMES=10 - set /a SLEEP_TIME=2 -) else ( - set /a RETRY_TIMES=%RETRY_TIMES%-1 - set /a SLEEP_TIME=%SLEEP_TIME%*2 -) - -REM bash -c "yes | anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%"" -anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%" -IF ERRORLEVEL 1 ( - echo Anaconda client login failed - exit /b 1 -) - -echo Uploading %PYTORCH_FINAL_PACKAGE% to Anaconda Cloud -anaconda upload "%PYTORCH_FINAL_PACKAGE%" -u pytorch-nightly --label main --force --no-progress - -IF ERRORLEVEL 1 ( - echo Anaconda upload retry times remaining: %RETRY_TIMES% - echo Sleep time: %SLEEP_TIME% seconds - IF %RETRY_TIMES% EQU 0 ( - echo Upload failed - exit /b 1 - ) - waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul - goto upload -) ELSE ( - set RETRY_TIMES= - set SLEEP_TIME= -) diff --git a/packaging/windows/internal/vc_env_helper.bat b/packaging/windows/internal/vc_env_helper.bat new file mode 100644 index 00000000000..e85a372f93d --- /dev/null +++ b/packaging/windows/internal/vc_env_helper.bat @@ -0,0 +1,43 @@ +@echo on + +set VC_VERSION_LOWER=16 +set VC_VERSION_UPPER=17 +if "%VC_YEAR%" == "2017" ( + set VC_VERSION_LOWER=15 + set VC_VERSION_UPPER=16 +) + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere +if "%VSDEVCMD_ARGS%" == "" ( + call "%VS15VCVARSALL%" x64 || exit /b 1 +) else ( + call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 +) + +@echo on + +set DISTUTILS_USE_SDK=1 + +set args=%1 +shift +:start +if [%1] == [] goto done +set args=%args% %1 +shift +goto start + +:done +if "%args%" == "" ( + echo Usage: vc_env_helper.bat [command] [args] + echo e.g. vc_env_helper.bat cl /c test.cpp +) + +%args% || exit /b 1 diff --git a/packaging/windows/internal/vc_install_helper.sh b/packaging/windows/internal/vc_install_helper.sh new file mode 100644 index 00000000000..cdae18065b9 --- /dev/null +++ b/packaging/windows/internal/vc_install_helper.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -ex + +if [[ "$CU_VERSION" == "cu92" ]]; then + export VC_YEAR=2017 + export VSDEVCMD_ARGS="-vcvars_ver=14.13" + powershell packaging/windows/internal/vs2017_install.ps1 +elif [[ "$CU_VERSION" == "cu100" ]]; then + export VC_YEAR=2017 + export VSDEVCMD_ARGS="" + powershell packaging/windows/internal/vs2017_install.ps1 +else + export VC_YEAR=2019 + export VSDEVCMD_ARGS="" +fi diff --git a/packaging/windows/internal/vs2017_install.ps1 b/packaging/windows/internal/vs2017_install.ps1 new file mode 100644 index 00000000000..3e953de1ab7 --- /dev/null +++ b/packaging/windows/internal/vs2017_install.ps1 @@ -0,0 +1,25 @@ +$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe" +$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", + "--add Microsoft.VisualStudio.Component.VC.Tools.14.13", + "--add Microsoft.Component.MSBuild", + "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", + "--add Microsoft.VisualStudio.Component.TextTemplating", + "--add Microsoft.VisualStudio.Component.VC.CoreIde", + "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core", + "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81") + +curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe +if ($LASTEXITCODE -ne 0) { + echo "Download of the VS 2017 installer failed" + exit 1 +} + +$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru +Remove-Item -Path vs_installer.exe -Force +$exitCode = $process.ExitCode +if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { + echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]." + exit 1 +} diff --git a/packaging/windows/internal/vs2019_install.ps1 b/packaging/windows/internal/vs2019_install.ps1 new file mode 100644 index 00000000000..e436051f0db --- /dev/null +++ b/packaging/windows/internal/vs2019_install.ps1 @@ -0,0 +1,21 @@ +$VS_DOWNLOAD_LINK = "https://aka.ms/vs/16/release/vs_buildtools.exe" +$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", + "--add Microsoft.Component.MSBuild", + "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", + "--add Microsoft.VisualStudio.Component.VC.CoreBuildTools", + "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", + "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64") + +curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe +if ($LASTEXITCODE -ne 0) { + echo "Download of the VS 2019 installer failed" + exit 1 +} + +$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru +Remove-Item -Path vs_installer.exe -Force +$exitCode = $process.ExitCode +if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { + echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]." + exit 1 +} diff --git a/packaging/windows/internal/vs_install.bat b/packaging/windows/internal/vs_install.bat deleted file mode 100644 index e6589092372..00000000000 --- a/packaging/windows/internal/vs_install.bat +++ /dev/null @@ -1,28 +0,0 @@ -@echo off - -set VS_DOWNLOAD_LINK=https://aka.ms/vs/15/release/vs_buildtools.exe -REM IF "%VS_LATEST%" == "1" ( -REM set VS_INSTALL_ARGS= --nocache --norestart --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools -REM set VSDEVCMD_ARGS= -REM ) ELSE ( -set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Workload.VCTools ^ - --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^ - --add Microsoft.Component.MSBuild ^ - --add Microsoft.VisualStudio.Component.Roslyn.Compiler ^ - --add Microsoft.VisualStudio.Component.TextTemplating ^ - --add Microsoft.VisualStudio.Component.VC.CoreIde ^ - --add Microsoft.VisualStudio.Component.VC.Redist.14.Latest ^ - --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core ^ - --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 ^ - --add Microsoft.VisualStudio.Component.VC.Tools.14.11 ^ - --add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81 -set VSDEVCMD_ARGS=-vcvars_ver=14.11 -REM ) - -curl -k -L %VS_DOWNLOAD_LINK% --output vs_installer.exe -if errorlevel 1 exit /b 1 - -start /wait .\vs_installer.exe %VS_INSTALL_ARGS% -if not errorlevel 0 exit /b 1 -if errorlevel 1 if not errorlevel 3010 exit /b 1 -if errorlevel 3011 exit /b 1 diff --git a/packaging/windows/old/cuda100.bat b/packaging/windows/old/cuda100.bat deleted file mode 100644 index ac9be3c6907..00000000000 --- a/packaging/windows/old/cuda100.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V10_0%"=="" ( - echo CUDA 10.0 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V10_0%" - set "PATH=%CUDA_PATH_V10_0%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/old/cuda90.bat b/packaging/windows/old/cuda90.bat deleted file mode 100644 index fe0294812e2..00000000000 --- a/packaging/windows/old/cuda90.bat +++ /dev/null @@ -1,59 +0,0 @@ -@echo off - -IF NOT "%BUILD_VISION%" == "" ( - set MODULE_NAME=vision -) ELSE ( - set MODULE_NAME=pytorch -) - -IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( - call internal\clone.bat - cd .. - IF ERRORLEVEL 1 goto eof -) ELSE ( - call internal\clean.bat -) - -call internal\check_deps.bat -IF ERRORLEVEL 1 goto eof - -REM Check for optional components - -set NO_CUDA= -set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 - -IF "%NVTOOLSEXT_PATH%"=="" ( - echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing - exit /b 1 - goto optcheck -) - -IF "%CUDA_PATH_V9_0%"=="" ( - echo CUDA 9 not found, failing - exit /b 1 -) ELSE ( - IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;7.0 - set TORCH_NVCC_FLAGS=-Xfatbin -compress-all - ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 - ) - - set "CUDA_PATH=%CUDA_PATH_V9_0%" - set "PATH=%CUDA_PATH_V9_0%\bin;%PATH%" -) - -:optcheck - -IF "%BUILD_VISION%" == "" ( - call internal\check_opts.bat - IF ERRORLEVEL 1 goto eof - - call internal\copy.bat - IF ERRORLEVEL 1 goto eof -) - -call internal\setup.bat -IF ERRORLEVEL 1 goto eof - -:eof diff --git a/packaging/windows/templates/auth_task.yml b/packaging/windows/templates/auth_task.yml deleted file mode 100644 index ece66412ff4..00000000000 --- a/packaging/windows/templates/auth_task.yml +++ /dev/null @@ -1,17 +0,0 @@ -jobs: -- job: 'VSTS_Auth_Task' - timeoutInMinutes: 5 - cancelTimeoutInMinutes: 5 - variables: - - group: 'peterjc-vsts-token' - - pool: - vmImage: 'win1803' - - steps: - - checkout: self - clean: true - - - template: vsts_auth.yml - parameters: - auth: $(vsts_auth) diff --git a/packaging/windows/templates/build_conda.yml b/packaging/windows/templates/build_conda.yml deleted file mode 100644 index 2d88271ad33..00000000000 --- a/packaging/windows/templates/build_conda.yml +++ /dev/null @@ -1,15 +0,0 @@ -parameters: - msagent: false - -steps: -- bash: 'find . -name "*.sh" -exec dos2unix {} +' - displayName: Replace file endings - -- script: 'if not exist %PYTORCH_FINAL_PACKAGE_DIR% mkdir %PYTORCH_FINAL_PACKAGE_DIR%' - displayName: 'Create final package directory' - -- bash: './packaging/conda/build_vision.sh $CUDA_VERSION $TORCHVISION_BUILD_VERSION $TORCHVISION_BUILD_NUMBER' - displayName: Build - env: - ${{ if eq(parameters.msagent, 'true') }}: - MAX_JOBS: 2 diff --git a/packaging/windows/templates/build_task.yml b/packaging/windows/templates/build_task.yml deleted file mode 100644 index e595662d313..00000000000 --- a/packaging/windows/templates/build_task.yml +++ /dev/null @@ -1,140 +0,0 @@ -parameters: - package: '' - spec: '' - jobDesc: '' - packageDesc: '' - msagent: true - cpuEnabled: true - cudaEnabled: true - condaEnabled: true - wheelsEnabled: true - override: false - -jobs: -- job: 'Windows_${{ parameters.spec }}_${{ parameters.package }}_Build' - timeoutInMinutes: 60 - cancelTimeoutInMinutes: 5 - condition: > - or(and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CPU'), - eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CPU'), - eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CUDA'), - eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true')), - and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CUDA'), - eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true'))) - variables: - - ${{ if eq(parameters.override, 'true') }}: - - name: TORCHVISION_BUILD_NUMBER - value: 1 - - name: PYTORCH_REPO - value: 'pytorch' - - name: PYTORCH_BRANCH - value: 'v0.4.0' - - ${{ if eq(parameters.msagent, 'true') }}: - - name: USE_SCCACHE - value: 0 - - ${{ if eq(parameters.msagent, 'false') }}: - - name: USE_SCCACHE - value: 1 - - ${{ if eq(parameters.package, 'Conda') }}: - - group: peterjc_anaconda_token - - name: PYTORCH_FINAL_PACKAGE_DIR - value: '$(Build.Repository.LocalPath)\packaging\windows\output' - - strategy: - maxParallel: 10 - matrix: - ${{ if eq(parameters.spec, 'CPU') }}: - PY3.5: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: cpu - PY3.6: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: cpu - PY3.7: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: cpu - ${{ if ne(parameters.spec, 'CPU') }}: - PY3.5_92: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: 92 - PY3.6_92: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: 92 - PY3.7_92: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: 92 - PY3.5_101: - DESIRED_PYTHON: 3.5 - CUDA_VERSION: 101 - PY3.6_101: - DESIRED_PYTHON: 3.6 - CUDA_VERSION: 101 - PY3.7_101: - DESIRED_PYTHON: 3.7 - CUDA_VERSION: 101 - - pool: - ${{ if eq(parameters.msagent, 'true') }}: - vmImage: 'win1803' - ${{ if eq(parameters.msagent, 'false') }}: - name: 'release' - - steps: - - checkout: self - clean: true - - - template: setup_env_for_msagent.yml - parameters: - msagent: ${{ parameters.msagent }} - - # - ${{ if and(eq(parameters.override, 'true'), eq(parameters.package, 'Wheels')) }}: - # - template: override_pytorch_version.yml - - - template: setup_nightly_variables.yml - parameters: - package: ${{ parameters.package }} - - - ${{ if eq(parameters.package, 'Wheels') }}: - - template: build_wheels.yml - parameters: - msagent: ${{ parameters.msagent }} - - - ${{ if eq(parameters.package, 'Conda') }}: - - template: build_conda.yml - parameters: - msagent: ${{ parameters.msagent }} - - - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}: - - template: publish_test_results.yml - parameters: - msagent: ${{ parameters.msagent }} - - # If you want to upload binaries to S3 & Anaconda Cloud, please uncomment this section. - - ${{ if and(eq(parameters.package, 'Wheels'), eq(parameters.spec, 'CPU')) }}: - - template: upload_to_s3.yml - parameters: - cuVer: '$(CUDA_VERSION)' - cudaVer: '$(CUDA_VERSION)' - - - ${{ if and(eq(parameters.package, 'Wheels'), ne(parameters.spec, 'CPU')) }}: - - template: upload_to_s3.yml - parameters: - cuVer: 'cu$(CUDA_VERSION)' - cudaVer: 'cuda$(CUDA_VERSION)' - - - ${{ if eq(parameters.package, 'Conda') }}: - - template: upload_to_conda.yml - parameters: - user: $(peterjc_conda_username) - pass: $(peterjc_conda_password) - - # If you want to upload binaries to Azure Git, please uncomment this section. - # - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}: - # - template: publish_test_results.yml - # parameters: - # msagent: ${{ parameters.msagent }} - # - template: publish_packages.yml - # parameters: - # package: ${{ parameters.package }} diff --git a/packaging/windows/templates/build_wheels.yml b/packaging/windows/templates/build_wheels.yml deleted file mode 100644 index 05c5712e334..00000000000 --- a/packaging/windows/templates/build_wheels.yml +++ /dev/null @@ -1,9 +0,0 @@ -parameters: - msagent: false - -steps: -- script: 'call packaging/windows/build_vision.bat %CUDA_VERSION% %TORCHVISION_BUILD_VERSION% %TORCHVISION_BUILD_NUMBER%' - displayName: Build - env: - ${{ if eq(parameters.msagent, 'true') }}: - MAX_JOBS: 2 diff --git a/packaging/windows/templates/linux_build_task.yml b/packaging/windows/templates/linux_build_task.yml deleted file mode 100644 index 0b32892791a..00000000000 --- a/packaging/windows/templates/linux_build_task.yml +++ /dev/null @@ -1,38 +0,0 @@ -parameters: - msagent: true - enabled: false - -jobs: -- job: 'Linux_CPU_Conda_Build' - timeoutInMinutes: 0 - cancelTimeoutInMinutes: 5 - condition: ${{ eq(parameters.enabled, 'true') }} - variables: - CUDA_VERSION: cpu - TORCH_CONDA_BUILD_FOLDER: pytorch-nightly - PYTORCH_FINAL_PACKAGE_DIR: '$(Build.Repository.LocalPath)/output' - - strategy: - maxParallel: 10 - matrix: - PY3.5: - DESIRED_PYTHON: 3.5 - - pool: - vmImage: 'ubuntu-16.04' - - steps: - - checkout: self - clean: true - - - script: 'sudo apt-get install p7zip-full' - displayName: 'Install 7Zip' - - - task: CondaEnvironment@1 - displayName: 'Install conda-build' - inputs: - packageSpecs: 'conda-build' - - - template: build_conda.yml - parameters: - msagent: ${{ parameters.msagent }} diff --git a/packaging/windows/templates/override_pytorch_version.yml b/packaging/windows/templates/override_pytorch_version.yml deleted file mode 100644 index 8af93ae43a4..00000000000 --- a/packaging/windows/templates/override_pytorch_version.yml +++ /dev/null @@ -1,6 +0,0 @@ -steps: -- script: 'windows/internal/override_pytorch_version.bat' - displayName: 'Override PyTorch Build Version for Wheels' - -- script: 'echo $(PYTORCH_BUILD_VERSION)' - displayName: 'Show PyTorch Build Version' diff --git a/packaging/windows/templates/publish_packages.yml b/packaging/windows/templates/publish_packages.yml deleted file mode 100644 index 51ce8247bf7..00000000000 --- a/packaging/windows/templates/publish_packages.yml +++ /dev/null @@ -1,8 +0,0 @@ -parameters: - package: '' - -steps: -- script: 'packaging/windows/internal/publish.bat' - displayName: 'Upload packages to Azure DevOps Repo' - env: - PACKAGEFULLNAME: ${{ parameters.package }} diff --git a/packaging/windows/templates/publish_test_results.yml b/packaging/windows/templates/publish_test_results.yml deleted file mode 100644 index 1e0dc0215d3..00000000000 --- a/packaging/windows/templates/publish_test_results.yml +++ /dev/null @@ -1,6 +0,0 @@ -steps: -- task: PublishTestResults@2 # No test results to publish - inputs: - testResultsFiles: 'windows/pytorch/test/**/*.xml' - testRunTitle: 'Publish test results' - enabled: false diff --git a/packaging/windows/templates/setup_env_for_msagent.yml b/packaging/windows/templates/setup_env_for_msagent.yml deleted file mode 100644 index 377734fa3db..00000000000 --- a/packaging/windows/templates/setup_env_for_msagent.yml +++ /dev/null @@ -1,25 +0,0 @@ -parameters: - msagent: false - -steps: -- ${{ if eq(parameters.msagent, 'true') }}: - - task: BatchScript@1 - displayName: 'Install 7Zip & cURL' - inputs: - filename: 'packaging/windows/internal/dep_install.bat' - - modifyEnvironment: true - - - task: BatchScript@1 - displayName: 'Install Visual Studio 2017' - inputs: - filename: 'packaging/windows/internal/vs_install.bat' - - modifyEnvironment: true - - - task: BatchScript@1 - displayName: 'Install CUDA' - inputs: - filename: 'packaging/windows/internal/cuda_install.bat' - - modifyEnvironment: true diff --git a/packaging/windows/templates/setup_nightly_variables.yml b/packaging/windows/templates/setup_nightly_variables.yml deleted file mode 100644 index 94b2fe934ce..00000000000 --- a/packaging/windows/templates/setup_nightly_variables.yml +++ /dev/null @@ -1,11 +0,0 @@ -parameters: - package: '' - -steps: -- task: BatchScript@1 - displayName: 'Setup nightly variables' - inputs: - filename: 'packaging/windows/internal/nightly_defaults.bat' - arguments: ${{ parameters.package }} - - modifyEnvironment: true diff --git a/packaging/windows/templates/upload_to_conda.yml b/packaging/windows/templates/upload_to_conda.yml deleted file mode 100644 index dc172bcf878..00000000000 --- a/packaging/windows/templates/upload_to_conda.yml +++ /dev/null @@ -1,10 +0,0 @@ -parameters: - user: '' - pass: '' - -steps: -- script: 'call packaging/windows/internal/upload.bat' - displayName: 'Upload packages to Anaconda Cloud' - env: - PYTORCH_ANACONDA_USERNAME: ${{ parameters.user }} - PYTORCH_ANACONDA_PASSWORD: ${{ parameters.pass }} diff --git a/packaging/windows/templates/upload_to_s3.yml b/packaging/windows/templates/upload_to_s3.yml deleted file mode 100644 index a31bcb15ae1..00000000000 --- a/packaging/windows/templates/upload_to_s3.yml +++ /dev/null @@ -1,15 +0,0 @@ -parameters: - cuVer: '' - cudaVer: '' - -steps: -- task: AmazonWebServices.aws-vsts-tools.S3Upload.S3Upload@1 - displayName: 'Upload ${{ parameters.cuVer }} wheel to S3' - inputs: - awsCredentials: 'Pytorch S3 bucket' - bucketName: 'pytorch' - sourceFolder: 'packaging/windows/output/${{ parameters.cudaVer }}' - globExpressions: '*.whl' - targetFolder: 'whl/nightly/${{ parameters.cuVer }}/' - filesAcl: 'public-read' - flattenFolders: 'true' diff --git a/packaging/windows/templates/vsts_auth.yml b/packaging/windows/templates/vsts_auth.yml deleted file mode 100644 index fde767d7f12..00000000000 --- a/packaging/windows/templates/vsts_auth.yml +++ /dev/null @@ -1,8 +0,0 @@ -parameters: - auth: '' - -steps: -- script: 'call packaging/windows/internal/auth.bat' - displayName: 'Sign in to Azure Pipelines' - env: - VSTS_AUTH: ${{ parameters.auth }} diff --git a/references/classification/README.md b/references/classification/README.md index acc2b0b4ed0..7a3144b7cac 100644 --- a/references/classification/README.md +++ b/references/classification/README.md @@ -4,7 +4,31 @@ This folder contains reference training scripts for image classification. They serve as a log of how to train specific models, as provide baseline training and evaluation scripts to quickly bootstrap research. -Except otherwise noted, all models have been trained on 8x V100 GPUs. +Except otherwise noted, all models have been trained on 8x V100 GPUs with +the following parameters: + +| Parameter | value | +| ------------------------ | ------ | +| `--batch_size` | `32` | +| `--epochs` | `90` | +| `--lr` | `0.1` | +| `--momentum` | `0.9` | +| `--wd`, `--weight-decay` | `1e-4` | +| `--lr-step-size` | `30` | +| `--lr-gamma` | `0.1` | + +### AlexNet and VGG + +Since `AlexNet` and the original `VGG` architectures do not include batch +normalization, the default initial learning rate `--lr 0.1` is to high. + +``` +python main.py --model $MODEL --lr 1e-2 +``` + +Here `$MODEL` is one of `alexnet`, `vgg11`, `vgg13`, `vgg16` or `vgg19`. Note +that `vgg11_bn`, `vgg13_bn`, `vgg16_bn`, and `vgg19_bn` include batch +normalization and thus are trained with the default parameters. ### ResNext-50 32x4d ``` @@ -29,6 +53,20 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ --lr-step-size 1 --lr-gamma 0.98 ``` + +### MobileNetV3 Large & Small +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --model $MODEL --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\ + --wd 0.00001 --lr-step-size 2 --lr-gamma 0.973 --auto-augment imagenet --random-erase 0.2 +``` + +Here `$MODEL` is one of `mobilenet_v3_large` or `mobilenet_v3_small`. + +Then we averaged the parameters of the last 3 checkpoints that improved the Acc@1. See [#3182](https://github.com/pytorch/vision/pull/3182) +and [#3354](https://github.com/pytorch/vision/pull/3354) for details. + + ## Mixed precision training Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex). @@ -51,6 +89,10 @@ For all post training quantized models (All quantized models except mobilenet-v2 4. eval_batch_size: 128 5. backend: 'fbgemm' +``` +python train_quantization.py --device='cpu' --post-training-quantize --backend='fbgemm' --model='' +``` + For Mobilenet-v2, the model was trained with quantization aware training, the settings used are: 1. num_workers: 16 2. batch_size: 32 @@ -63,28 +105,38 @@ For Mobilenet-v2, the model was trained with quantization aware training, the se 9. momentum: 0.9 10. lr_step_size:30 11. lr_gamma: 0.1 +12. weight-decay: 0.0001 + +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train_quantization.py --model='mobilenet_v2' +``` Training converges at about 10 epochs. -For post training quant, device is set to CPU. For training, the device is set to CUDA +For Mobilenet-v3 Large, the model was trained with quantization aware training, the settings used are: +1. num_workers: 16 +2. batch_size: 32 +3. eval_batch_size: 128 +4. backend: 'qnnpack' +5. learning-rate: 0.001 +6. num_epochs: 90 +7. num_observer_update_epochs:4 +8. num_batch_norm_update_epochs:3 +9. momentum: 0.9 +10. lr_step_size:30 +11. lr_gamma: 0.1 +12. weight-decay: 0.00001 -### Command to evaluate quantized models using the pre-trained weights: -For all quantized models except inception_v3: ``` -python references/classification/train_quantization.py --data-path='imagenet_full_size/' \ - --device='cpu' --test-only --backend='fbgemm' --model='' +python -m torch.distributed.launch --nproc_per_node=8 --use_env train_quantization.py --model='mobilenet_v3_large' \ + --wd 0.00001 --lr 0.001 ``` -For inception_v3, since it expects tensors with a size of N x 3 x 299 x 299, before running above command, -need to change the input size of dataset_test in train.py to: +For post training quant, device is set to CPU. For training, the device is set to CUDA. + +### Command to evaluate quantized models using the pre-trained weights: + ``` -dataset_test = torchvision.datasets.ImageFolder( - valdir, - transforms.Compose([ - transforms.Resize(342), - transforms.CenterCrop(299), - transforms.ToTensor(), - normalize, - ])) +python train_quantization.py --device='cpu' --test-only --backend='' --model='' ``` diff --git a/references/classification/presets.py b/references/classification/presets.py new file mode 100644 index 00000000000..6bb389ba8db --- /dev/null +++ b/references/classification/presets.py @@ -0,0 +1,37 @@ +from torchvision.transforms import autoaugment, transforms + + +class ClassificationPresetTrain: + def __init__(self, crop_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), hflip_prob=0.5, + auto_augment_policy=None, random_erase_prob=0.0): + trans = [transforms.RandomResizedCrop(crop_size)] + if hflip_prob > 0: + trans.append(transforms.RandomHorizontalFlip(hflip_prob)) + if auto_augment_policy is not None: + aa_policy = autoaugment.AutoAugmentPolicy(auto_augment_policy) + trans.append(autoaugment.AutoAugment(policy=aa_policy)) + trans.extend([ + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ]) + if random_erase_prob > 0: + trans.append(transforms.RandomErasing(p=random_erase_prob)) + + self.transforms = transforms.Compose(trans) + + def __call__(self, img): + return self.transforms(img) + + +class ClassificationPresetEval: + def __init__(self, crop_size, resize_size=256, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): + + self.transforms = transforms.Compose([ + transforms.Resize(resize_size), + transforms.CenterCrop(crop_size), + transforms.ToTensor(), + transforms.Normalize(mean=mean, std=std), + ]) + + def __call__(self, img): + return self.transforms(img) diff --git a/references/classification/train.py b/references/classification/train.py index 480092a0331..232c3b5556b 100644 --- a/references/classification/train.py +++ b/references/classification/train.py @@ -1,15 +1,13 @@ -from __future__ import print_function import datetime import os import time -import sys import torch import torch.utils.data from torch import nn import torchvision -from torchvision import transforms +import presets import utils try: @@ -81,29 +79,26 @@ def _get_cache_path(filepath): return cache_path -def load_data(traindir, valdir, cache_dataset, distributed): +def load_data(traindir, valdir, args): # Data loading code print("Loading data") - normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) + resize_size, crop_size = (342, 299) if args.model == 'inception_v3' else (256, 224) print("Loading training data") st = time.time() cache_path = _get_cache_path(traindir) - if cache_dataset and os.path.exists(cache_path): + if args.cache_dataset and os.path.exists(cache_path): # Attention, as the transforms are also cached! print("Loading dataset_train from {}".format(cache_path)) dataset, _ = torch.load(cache_path) else: + auto_augment_policy = getattr(args, "auto_augment", None) + random_erase_prob = getattr(args, "random_erase", 0.0) dataset = torchvision.datasets.ImageFolder( traindir, - transforms.Compose([ - transforms.RandomResizedCrop(224), - transforms.RandomHorizontalFlip(), - transforms.ToTensor(), - normalize, - ])) - if cache_dataset: + presets.ClassificationPresetTrain(crop_size=crop_size, auto_augment_policy=auto_augment_policy, + random_erase_prob=random_erase_prob)) + if args.cache_dataset: print("Saving dataset_train to {}".format(cache_path)) utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset, traindir), cache_path) @@ -111,26 +106,21 @@ def load_data(traindir, valdir, cache_dataset, distributed): print("Loading validation data") cache_path = _get_cache_path(valdir) - if cache_dataset and os.path.exists(cache_path): + if args.cache_dataset and os.path.exists(cache_path): # Attention, as the transforms are also cached! print("Loading dataset_test from {}".format(cache_path)) dataset_test, _ = torch.load(cache_path) else: dataset_test = torchvision.datasets.ImageFolder( valdir, - transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - normalize, - ])) - if cache_dataset: + presets.ClassificationPresetEval(crop_size=crop_size, resize_size=resize_size)) + if args.cache_dataset: print("Saving dataset_test to {}".format(cache_path)) utils.mkdir(os.path.dirname(cache_path)) utils.save_on_master((dataset_test, valdir), cache_path) print("Creating data loaders") - if distributed: + if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) else: @@ -141,12 +131,9 @@ def load_data(traindir, valdir, cache_dataset, distributed): def main(args): - if args.apex: - if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") - if amp is None: - raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " - "to enable mixed-precision training.") + if args.apex and amp is None: + raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " + "to enable mixed-precision training.") if args.output_dir: utils.mkdir(args.output_dir) @@ -160,8 +147,7 @@ def main(args): train_dir = os.path.join(args.data_path, 'train') val_dir = os.path.join(args.data_path, 'val') - dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, - args.cache_dataset, args.distributed) + dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, args) data_loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, pin_memory=True) @@ -178,8 +164,15 @@ def main(args): criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD( - model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + opt_name = args.opt.lower() + if opt_name == 'sgd': + optimizer = torch.optim.SGD( + model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) + elif opt_name == 'rmsprop': + optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.momentum, + weight_decay=args.weight_decay, eps=0.0316, alpha=0.9) + else: + raise RuntimeError("Invalid optimizer {}. Only SGD and RMSprop are supported.".format(args.opt)) if args.apex: model, optimizer = amp.initialize(model, optimizer, @@ -243,6 +236,7 @@ def parse_args(): help='number of total epochs to run') parser.add_argument('-j', '--workers', default=16, type=int, metavar='N', help='number of data loading workers (default: 16)') + parser.add_argument('--opt', default='sgd', type=str, help='optimizer') parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') @@ -280,6 +274,8 @@ def parse_args(): help="Use pre-trained models from the modelzoo", action="store_true", ) + parser.add_argument('--auto-augment', default=None, help='auto augment policy (default: None)') + parser.add_argument('--random-erase', default=0.0, type=float, help='random erasing probability (default: 0.0)') # Mixed precision training parameters parser.add_argument('--apex', action='store_true', diff --git a/references/classification/train_quantization.py b/references/classification/train_quantization.py index 22621fe2404..dd41d0b3d1f 100644 --- a/references/classification/train_quantization.py +++ b/references/classification/train_quantization.py @@ -1,8 +1,6 @@ -from __future__ import print_function import datetime import os import time -import sys import copy import torch @@ -39,8 +37,7 @@ def main(args): train_dir = os.path.join(args.data_path, 'train') val_dir = os.path.join(args.data_path, 'val') - dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, - args.cache_dataset, args.distributed) + dataset, dataset_test, train_sampler, test_sampler = load_data(train_dir, val_dir, args) data_loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, pin_memory=True) @@ -59,6 +56,9 @@ def main(args): model.qconfig = torch.quantization.get_default_qat_qconfig(args.backend) torch.quantization.prepare_qat(model, inplace=True) + if args.distributed and args.sync_bn: + model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) + optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) @@ -130,7 +130,7 @@ def main(args): print('Evaluate QAT model') evaluate(model, criterion, data_loader_test, device=device) - quantized_eval_model = copy.deepcopy(model) + quantized_eval_model = copy.deepcopy(model_without_ddp) quantized_eval_model.eval() quantized_eval_model.to(torch.device('cpu')) torch.quantization.convert(quantized_eval_model, inplace=True) @@ -224,6 +224,12 @@ def parse_args(): It also serializes the transforms", action="store_true", ) + parser.add_argument( + "--sync-bn", + dest="sync_bn", + help="Use sync batch norm", + action="store_true", + ) parser.add_argument( "--test-only", dest="test_only", diff --git a/references/classification/utils.py b/references/classification/utils.py index 5ea6dfef341..4e53ed1d3d7 100644 --- a/references/classification/utils.py +++ b/references/classification/utils.py @@ -1,6 +1,7 @@ -from __future__ import print_function -from collections import defaultdict, deque +from collections import defaultdict, deque, OrderedDict +import copy import datetime +import hashlib import time import torch import torch.distributed as dist @@ -253,3 +254,126 @@ def init_distributed_mode(args): torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) setup_for_distributed(args.rank == 0) + + +def average_checkpoints(inputs): + """Loads checkpoints from inputs and returns a model with averaged weights. Original implementation taken from: + https://github.com/pytorch/fairseq/blob/a48f235636557b8d3bc4922a6fa90f3a0fa57955/scripts/average_checkpoints.py#L16 + + Args: + inputs (List[str]): An iterable of string paths of checkpoints to load from. + Returns: + A dict of string keys mapping to various values. The 'model' key + from the returned dict should correspond to an OrderedDict mapping + string parameter names to torch Tensors. + """ + params_dict = OrderedDict() + params_keys = None + new_state = None + num_models = len(inputs) + for fpath in inputs: + with open(fpath, "rb") as f: + state = torch.load( + f, + map_location=( + lambda s, _: torch.serialization.default_restore_location(s, "cpu") + ), + ) + # Copies over the settings from the first checkpoint + if new_state is None: + new_state = state + model_params = state["model"] + model_params_keys = list(model_params.keys()) + if params_keys is None: + params_keys = model_params_keys + elif params_keys != model_params_keys: + raise KeyError( + "For checkpoint {}, expected list of params: {}, " + "but found: {}".format(f, params_keys, model_params_keys) + ) + for k in params_keys: + p = model_params[k] + if isinstance(p, torch.HalfTensor): + p = p.float() + if k not in params_dict: + params_dict[k] = p.clone() + # NOTE: clone() is needed in case of p is a shared parameter + else: + params_dict[k] += p + averaged_params = OrderedDict() + for k, v in params_dict.items(): + averaged_params[k] = v + if averaged_params[k].is_floating_point(): + averaged_params[k].div_(num_models) + else: + averaged_params[k] //= num_models + new_state["model"] = averaged_params + return new_state + + +def store_model_weights(model, checkpoint_path, checkpoint_key='model', strict=True): + """ + This method can be used to prepare weights files for new models. It receives as + input a model architecture and a checkpoint from the training script and produces + a file with the weights ready for release. + + Examples: + from torchvision import models as M + + # Classification + model = M.mobilenet_v3_large(pretrained=False) + print(store_model_weights(model, './class.pth')) + + # Quantized Classification + model = M.quantization.mobilenet_v3_large(pretrained=False, quantize=False) + model.fuse_model() + model.qconfig = torch.quantization.get_default_qat_qconfig('qnnpack') + _ = torch.quantization.prepare_qat(model, inplace=True) + print(store_model_weights(model, './qat.pth')) + + # Object Detection + model = M.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=False, pretrained_backbone=False) + print(store_model_weights(model, './obj.pth')) + + # Segmentation + model = M.segmentation.deeplabv3_mobilenet_v3_large(pretrained=False, pretrained_backbone=False, aux_loss=True) + print(store_model_weights(model, './segm.pth', strict=False)) + + Args: + model (pytorch.nn.Module): The model on which the weights will be loaded for validation purposes. + checkpoint_path (str): The path of the checkpoint we will load. + checkpoint_key (str, optional): The key of the checkpoint where the model weights are stored. + Default: "model". + strict (bool): whether to strictly enforce that the keys + in :attr:`state_dict` match the keys returned by this module's + :meth:`~torch.nn.Module.state_dict` function. Default: ``True`` + + Returns: + output_path (str): The location where the weights are saved. + """ + # Store the new model next to the checkpoint_path + checkpoint_path = os.path.abspath(checkpoint_path) + output_dir = os.path.dirname(checkpoint_path) + + # Deep copy to avoid side-effects on the model object. + model = copy.deepcopy(model) + checkpoint = torch.load(checkpoint_path, map_location='cpu') + + # Load the weights to the model to validate that everything works + # and remove unnecessary weights (such as auxiliaries, etc) + model.load_state_dict(checkpoint[checkpoint_key], strict=strict) + + tmp_path = os.path.join(output_dir, str(model.__hash__())) + torch.save(model.state_dict(), tmp_path) + + sha256_hash = hashlib.sha256() + with open(tmp_path, "rb") as f: + # Read and update hash string value in blocks of 4K + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + hh = sha256_hash.hexdigest() + + output_path = os.path.join(output_dir, "weights-" + str(hh[:8]) + ".pth") + os.replace(tmp_path, output_path) + + return output_path diff --git a/references/detection/README.md b/references/detection/README.md new file mode 100644 index 00000000000..c8eaf46da5f --- /dev/null +++ b/references/detection/README.md @@ -0,0 +1,66 @@ +# Object detection reference training scripts + +This folder contains reference training scripts for object detection. +They serve as a log of how to train specific models, to provide baseline +training and evaluation scripts to quickly bootstrap research. + +To execute the example commands below you must install the following: + +``` +cython +pycocotools +matplotlib +``` + +You must modify the following flags: + +`--data-path=/path/to/coco/dataset` + +`--nproc_per_node=` + +Except otherwise noted, all models have been trained on 8x V100 GPUs. + +### Faster R-CNN ResNet-50 FPN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model fasterrcnn_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 +``` + +### Faster R-CNN MobileNetV3-Large FPN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model fasterrcnn_mobilenet_v3_large_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 +``` + +### Faster R-CNN MobileNetV3-Large 320 FPN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model fasterrcnn_mobilenet_v3_large_320_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 +``` + +### RetinaNet +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model retinanet_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 +``` + + +### Mask R-CNN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco --model maskrcnn_resnet50_fpn --epochs 26\ + --lr-steps 16 22 --aspect-ratio-group-factor 3 +``` + + +### Keypoint R-CNN +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ + --dataset coco_kp --model keypointrcnn_resnet50_fpn --epochs 46\ + --lr-steps 36 43 --aspect-ratio-group-factor 3 +``` + diff --git a/references/detection/coco_eval.py b/references/detection/coco_eval.py index d758a64a909..09648f29ae4 100644 --- a/references/detection/coco_eval.py +++ b/references/detection/coco_eval.py @@ -238,8 +238,11 @@ def createIndex(self): def loadRes(self, resFile): """ Load result file and return a result api object. - :param resFile (str) : file name of result file - :return: res (obj) : result api object + Args: + self (obj): coco object with ground truth annotations + resFile (str): file name of result file + Returns: + res (obj): result api object """ res = COCO() res.dataset['images'] = [img for img in self.dataset['images']] diff --git a/references/detection/engine.py b/references/detection/engine.py index 68c39a4fc1b..9f34336b0cc 100644 --- a/references/detection/engine.py +++ b/references/detection/engine.py @@ -52,6 +52,8 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + return metric_logger + def _get_iou_types(model): model_without_ddp = model @@ -79,13 +81,12 @@ def evaluate(model, data_loader, device): iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) - for image, targets in metric_logger.log_every(data_loader, 100, header): - image = list(img.to(device) for img in image) - targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + for images, targets in metric_logger.log_every(data_loader, 100, header): + images = list(img.to(device) for img in images) torch.cuda.synchronize() model_time = time.time() - outputs = model(image) + outputs = model(images) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time diff --git a/references/detection/group_by_aspect_ratio.py b/references/detection/group_by_aspect_ratio.py index 61694cd63a4..1b76f4c64f7 100644 --- a/references/detection/group_by_aspect_ratio.py +++ b/references/detection/group_by_aspect_ratio.py @@ -1,6 +1,8 @@ import bisect from collections import defaultdict import copy +from itertools import repeat, chain +import math import numpy as np import torch @@ -12,13 +14,19 @@ from PIL import Image +def _repeat_to_at_least(iterable, n): + repeat_times = math.ceil(n / len(iterable)) + repeated = chain.from_iterable(repeat(iterable, repeat_times)) + return list(repeated) + + class GroupedBatchSampler(BatchSampler): """ Wraps another sampler to yield a mini-batch of indices. It enforces that the batch only contain elements from the same group. It also tries to provide mini-batches which follows an ordering which is as close as possible to the ordering from the original sampler. - Arguments: + Args: sampler (Sampler): Base sampler. group_ids (list[int]): If the sampler produces indices in range [0, N), `group_ids` must be a list of `N` ints which contains the group id of each sample. @@ -63,8 +71,8 @@ def __iter__(self): for group_id, _ in sorted(buffer_per_group.items(), key=lambda x: len(x[1]), reverse=True): remaining = self.batch_size - len(buffer_per_group[group_id]) - buffer_per_group[group_id].extend( - samples_per_group[group_id][:remaining]) + samples_from_group_id = _repeat_to_at_least(samples_per_group[group_id], remaining) + buffer_per_group[group_id].extend(samples_from_group_id[:remaining]) assert len(buffer_per_group[group_id]) == self.batch_size yield buffer_per_group[group_id] num_remaining -= 1 diff --git a/references/detection/presets.py b/references/detection/presets.py new file mode 100644 index 00000000000..b0c86ed1265 --- /dev/null +++ b/references/detection/presets.py @@ -0,0 +1,21 @@ +import transforms as T + + +class DetectionPresetTrain: + def __init__(self, hflip_prob=0.5): + trans = [T.ToTensor()] + if hflip_prob > 0: + trans.append(T.RandomHorizontalFlip(hflip_prob)) + + self.transforms = T.Compose(trans) + + def __call__(self, img, target): + return self.transforms(img, target) + + +class DetectionPresetEval: + def __init__(self): + self.transforms = T.ToTensor() + + def __call__(self, img, target): + return self.transforms(img, target) diff --git a/references/detection/train.py b/references/detection/train.py index 3b928611b4f..83fad36d2cc 100644 --- a/references/detection/train.py +++ b/references/detection/train.py @@ -8,6 +8,14 @@ The default hyperparameters are tuned for training on 8 gpus and 2 images per gpu. --lr 0.02 --batch-size 2 --world-size 8 If you use different number of gpus, the learning rate should be changed to 0.02/8*$NGPU. + +On top of that, for training Faster/Mask R-CNN, the default hyperparameters are + --epochs 26 --lr-steps 16 22 --aspect-ratio-group-factor 3 + +Also, if you train Keypoint R-CNN, the default hyperparameters are + --epochs 46 --lr-steps 36 43 --aspect-ratio-group-factor 3 +Because the number of images is smaller in the person keypoint subset of COCO, +the number of epochs should be adapted so that we have the same number of iterations. """ import datetime import os @@ -15,7 +23,6 @@ import torch import torch.utils.data -from torch import nn import torchvision import torchvision.models.detection import torchvision.models.detection.mask_rcnn @@ -25,8 +32,8 @@ from group_by_aspect_ratio import GroupedBatchSampler, create_aspect_ratio_groups from engine import train_one_epoch, evaluate +import presets import utils -import transforms as T def get_dataset(name, image_set, transform, data_path): @@ -41,11 +48,7 @@ def get_dataset(name, image_set, transform, data_path): def get_transform(train): - transforms = [] - transforms.append(T.ToTensor()) - if train: - transforms.append(T.RandomHorizontalFlip(0.5)) - return T.Compose(transforms) + return presets.DetectionPresetTrain() if train else presets.DetectionPresetEval() def main(args): @@ -85,8 +88,14 @@ def main(args): collate_fn=utils.collate_fn) print("Creating model") - model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, - pretrained=args.pretrained) + kwargs = { + "trainable_backbone_layers": args.trainable_backbone_layers + } + if "rcnn" in args.model: + if args.rpn_score_thresh is not None: + kwargs["rpn_score_thresh"] = args.rpn_score_thresh + model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained, + **kwargs) model.to(device) model_without_ddp = model @@ -106,6 +115,7 @@ def main(args): model_without_ddp.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 if args.test_only: evaluate(model, data_loader_test, device=device) @@ -113,7 +123,7 @@ def main(args): print("Start training") start_time = time.time() - for epoch in range(args.epochs): + for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq) @@ -123,7 +133,8 @@ def main(args): 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), - 'args': args}, + 'args': args, + 'epoch': epoch}, os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) # evaluate after every epoch @@ -145,7 +156,7 @@ def main(args): parser.add_argument('--device', default='cuda', help='device') parser.add_argument('-b', '--batch-size', default=2, type=int, help='images per gpu, the total batch size is $NGPU x batch_size') - parser.add_argument('--epochs', default=13, type=int, metavar='N', + parser.add_argument('--epochs', default=26, type=int, metavar='N', help='number of total epochs to run') parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') @@ -158,12 +169,16 @@ def main(args): metavar='W', help='weight decay (default: 1e-4)', dest='weight_decay') parser.add_argument('--lr-step-size', default=8, type=int, help='decrease lr every step-size epochs') - parser.add_argument('--lr-steps', default=[8, 11], nargs='+', type=int, help='decrease lr every step-size epochs') + parser.add_argument('--lr-steps', default=[16, 22], nargs='+', type=int, help='decrease lr every step-size epochs') parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') parser.add_argument('--print-freq', default=20, type=int, help='print frequency') parser.add_argument('--output-dir', default='.', help='path where to save') parser.add_argument('--resume', default='', help='resume from checkpoint') - parser.add_argument('--aspect-ratio-group-factor', default=0, type=int) + parser.add_argument('--start_epoch', default=0, type=int, help='start epoch') + parser.add_argument('--aspect-ratio-group-factor', default=3, type=int) + parser.add_argument('--rpn-score-thresh', default=None, type=float, help='rpn score threshold for faster-rcnn') + parser.add_argument('--trainable-backbone-layers', default=None, type=int, + help='number of trainable layers of backbone') parser.add_argument( "--test-only", dest="test_only", diff --git a/references/detection/transforms.py b/references/detection/transforms.py index 73efc92bdef..937ae3c07fc 100644 --- a/references/detection/transforms.py +++ b/references/detection/transforms.py @@ -1,5 +1,4 @@ import random -import torch from torchvision.transforms import functional as F diff --git a/references/detection/utils.py b/references/detection/utils.py index 0e8e8560118..82ae79bc3fb 100644 --- a/references/detection/utils.py +++ b/references/detection/utils.py @@ -1,5 +1,3 @@ -from __future__ import print_function - from collections import defaultdict, deque import datetime import pickle diff --git a/references/segmentation/README.md b/references/segmentation/README.md new file mode 100644 index 00000000000..6e24f836624 --- /dev/null +++ b/references/segmentation/README.md @@ -0,0 +1,43 @@ +# Semantic segmentation reference training scripts + +This folder contains reference training scripts for semantic segmentation. +They serve as a log of how to train specific models, as provide baseline +training and evaluation scripts to quickly bootstrap research. + +All models have been trained on 8x V100 GPUs. + +You must modify the following flags: + +`--data-path=/path/to/dataset` + +`--nproc_per_node=` + +## fcn_resnet50 +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --lr 0.02 --dataset coco -b 4 --model fcn_resnet50 --aux-loss +``` + +## fcn_resnet101 +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --lr 0.02 --dataset coco -b 4 --model fcn_resnet101 --aux-loss +``` + +## deeplabv3_resnet50 +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --lr 0.02 --dataset coco -b 4 --model deeplabv3_resnet50 --aux-loss +``` + +## deeplabv3_resnet101 +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --lr 0.02 --dataset coco -b 4 --model deeplabv3_resnet101 --aux-loss +``` + +## deeplabv3_mobilenet_v3_large +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --dataset coco -b 4 --model deeplabv3_mobilenet_v3_large --aux-loss --wd 0.000001 +``` + +## lraspp_mobilenet_v3_large +``` +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --dataset coco -b 4 --model lraspp_mobilenet_v3_large --wd 0.000001 +``` diff --git a/references/segmentation/presets.py b/references/segmentation/presets.py new file mode 100644 index 00000000000..3bf29c23751 --- /dev/null +++ b/references/segmentation/presets.py @@ -0,0 +1,32 @@ +import transforms as T + + +class SegmentationPresetTrain: + def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): + min_size = int(0.5 * base_size) + max_size = int(2.0 * base_size) + + trans = [T.RandomResize(min_size, max_size)] + if hflip_prob > 0: + trans.append(T.RandomHorizontalFlip(hflip_prob)) + trans.extend([ + T.RandomCrop(crop_size), + T.ToTensor(), + T.Normalize(mean=mean, std=std), + ]) + self.transforms = T.Compose(trans) + + def __call__(self, img, target): + return self.transforms(img, target) + + +class SegmentationPresetEval: + def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): + self.transforms = T.Compose([ + T.RandomResize(base_size, base_size), + T.ToTensor(), + T.Normalize(mean=mean, std=std), + ]) + + def __call__(self, img, target): + return self.transforms(img, target) diff --git a/references/segmentation/train.py b/references/segmentation/train.py index b1173d5323a..690e248323e 100644 --- a/references/segmentation/train.py +++ b/references/segmentation/train.py @@ -8,17 +8,17 @@ import torchvision from coco_utils import get_coco -import transforms as T +import presets import utils -def get_dataset(name, image_set, transform): +def get_dataset(dir_path, name, image_set, transform): def sbd(*args, **kwargs): return torchvision.datasets.SBDataset(*args, mode='segmentation', **kwargs) paths = { - "voc": ('/datasets01/VOC/060817/', torchvision.datasets.VOCSegmentation, 21), - "voc_aug": ('/datasets01/SBDD/072318/', sbd, 21), - "coco": ('/datasets01/COCO/022719/', get_coco, 21) + "voc": (dir_path, torchvision.datasets.VOCSegmentation, 21), + "voc_aug": (dir_path, sbd, 21), + "coco": (dir_path, get_coco, 21) } p, ds_fn, num_classes = paths[name] @@ -30,18 +30,7 @@ def get_transform(train): base_size = 520 crop_size = 480 - min_size = int((0.5 if train else 1.0) * base_size) - max_size = int((2.0 if train else 1.0) * base_size) - transforms = [] - transforms.append(T.RandomResize(min_size, max_size)) - if train: - transforms.append(T.RandomHorizontalFlip(0.5)) - transforms.append(T.RandomCrop(crop_size)) - transforms.append(T.ToTensor()) - transforms.append(T.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225])) - - return T.Compose(transforms) + return presets.SegmentationPresetTrain(base_size, crop_size) if train else presets.SegmentationPresetEval(base_size) def criterion(inputs, target): @@ -101,8 +90,8 @@ def main(args): device = torch.device(args.device) - dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True)) - dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False)) + dataset, num_classes = get_dataset(args.data_path, args.dataset, "train", get_transform(train=True)) + dataset_test, _ = get_dataset(args.data_path, args.dataset, "val", get_transform(train=False)) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) @@ -128,20 +117,11 @@ def main(args): if args.distributed: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) - if args.resume: - checkpoint = torch.load(args.resume, map_location='cpu') - model.load_state_dict(checkpoint['model']) - model_without_ddp = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) model_without_ddp = model.module - if args.test_only: - confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) - print(confmat) - return - params_to_optimize = [ {"params": [p for p in model_without_ddp.backbone.parameters() if p.requires_grad]}, {"params": [p for p in model_without_ddp.classifier.parameters() if p.requires_grad]}, @@ -157,8 +137,21 @@ def main(args): optimizer, lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9) + if args.resume: + checkpoint = torch.load(args.resume, map_location='cpu') + model_without_ddp.load_state_dict(checkpoint['model'], strict=not args.test_only) + if not args.test_only: + optimizer.load_state_dict(checkpoint['optimizer']) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + args.start_epoch = checkpoint['epoch'] + 1 + + if args.test_only: + confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) + print(confmat) + return + start_time = time.time() - for epoch in range(args.epochs): + for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq) @@ -168,6 +161,7 @@ def main(args): { 'model': model_without_ddp.state_dict(), 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch, 'args': args }, @@ -182,7 +176,8 @@ def parse_args(): import argparse parser = argparse.ArgumentParser(description='PyTorch Segmentation Training') - parser.add_argument('--dataset', default='voc', help='dataset') + parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset path') + parser.add_argument('--dataset', default='coco', help='dataset name') parser.add_argument('--model', default='fcn_resnet101', help='model') parser.add_argument('--aux-loss', action='store_true', help='auxiliar loss') parser.add_argument('--device', default='cuda', help='device') @@ -201,6 +196,8 @@ def parse_args(): parser.add_argument('--print-freq', default=10, type=int, help='print frequency') parser.add_argument('--output-dir', default='.', help='path where to save') parser.add_argument('--resume', default='', help='resume from checkpoint') + parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='start epoch') parser.add_argument( "--test-only", dest="test_only", diff --git a/references/segmentation/transforms.py b/references/segmentation/transforms.py index bce4bfbe639..4fe5a5ad147 100644 --- a/references/segmentation/transforms.py +++ b/references/segmentation/transforms.py @@ -78,7 +78,7 @@ def __call__(self, image, target): class ToTensor(object): def __call__(self, image, target): image = F.to_tensor(image) - target = torch.as_tensor(np.asarray(target), dtype=torch.int64) + target = torch.as_tensor(np.array(target), dtype=torch.int64) return image, target diff --git a/references/segmentation/utils.py b/references/segmentation/utils.py index 2719996c808..b67c18052fb 100644 --- a/references/segmentation/utils.py +++ b/references/segmentation/utils.py @@ -1,7 +1,5 @@ -from __future__ import print_function from collections import defaultdict, deque import datetime -import math import time import torch import torch.distributed as dist diff --git a/references/similarity/loss.py b/references/similarity/loss.py index 3e467b74c52..1fa4a89c762 100644 --- a/references/similarity/loss.py +++ b/references/similarity/loss.py @@ -77,7 +77,7 @@ def batch_all_triplet_loss(labels, embeddings, margin, p): def _get_triplet_mask(labels): # Check that i, j and k are distinct - indices_equal = torch.eye(labels.size(0), dtype=torch.uint8, device=labels.device) + indices_equal = torch.eye(labels.size(0), dtype=torch.bool, device=labels.device) indices_not_equal = ~indices_equal i_not_equal_j = indices_not_equal.unsqueeze(2) i_not_equal_k = indices_not_equal.unsqueeze(1) @@ -96,7 +96,7 @@ def _get_triplet_mask(labels): def _get_anchor_positive_triplet_mask(labels): # Check that i and j are distinct - indices_equal = torch.eye(labels.size(0), dtype=torch.uint8, device=labels.device) + indices_equal = torch.eye(labels.size(0), dtype=torch.bool, device=labels.device) indices_not_equal = ~indices_equal # Check if labels[i] == labels[j] diff --git a/references/similarity/model.py b/references/similarity/model.py index 797ad41a48b..3b39c0ec0dc 100644 --- a/references/similarity/model.py +++ b/references/similarity/model.py @@ -1,4 +1,3 @@ -import torch import torch.nn as nn import torchvision.models as models diff --git a/references/similarity/test.py b/references/similarity/test.py index a1e646111c8..8381e02e740 100644 --- a/references/similarity/test.py +++ b/references/similarity/test.py @@ -27,15 +27,15 @@ def test_pksampler(self): for _, labels in loader: bins = defaultdict(int) - for l in labels.tolist(): - bins[l] += 1 + for label in labels.tolist(): + bins[label] += 1 # Ensure that each batch has samples from exactly p classes self.assertEqual(len(bins), p) # Ensure that there are k samples from each class - for l in bins: - self.assertEqual(bins[l], k) + for b in bins: + self.assertEqual(bins[b], k) if __name__ == '__main__': diff --git a/references/video_classification/README.md b/references/video_classification/README.md new file mode 100644 index 00000000000..ef7db6dcd90 --- /dev/null +++ b/references/video_classification/README.md @@ -0,0 +1,34 @@ +# Video Classification + +We present a simple training script that can be used for replicating the result of [resenet-based video models](https://research.fb.com/wp-content/uploads/2018/04/a-closer-look-at-spatiotemporal-convolutions-for-action-recognition.pdf). All models are trained on [Kinetics400 dataset](https://deepmind.com/research/open-source/kinetics), a benchmark dataset for human-action recognition. The accuracy is reported on the traditional validation split. + +## Data preparation + +If you already have downloaded [Kinetics400 dataset](https://deepmind.com/research/open-source/kinetics), +please proceed directly to the next section. + +To download videos, one can use https://github.com/Showmax/kinetics-downloader. Please note that the dataset can take up upwards of 400GB, depending on the quality setting during download. + +## Training + +We assume the training and validation AVI videos are stored at `/data/kinectics400/train` and +`/data/kinectics400/val`. For training we suggest starting with the hyperparameters reported in the [paper](https://research.fb.com/wp-content/uploads/2018/04/a-closer-look-at-spatiotemporal-convolutions-for-action-recognition.pdf), in order to match the performance of said models. Clip sampling strategy is a particularly important parameter during training, and we suggest using random temporal jittering during training - in other words sampling multiple training clips from each video with random start times during at every epoch. This functionality is built into our training script, and optimal hyperparameters are set by default. + +### Multiple GPUs + +Run the training on a single node with 8 GPUs: +```bash +python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --data-path=/data/kinectics400 --train-dir=train --val-dir=val --batch-size=16 --cache-dataset --sync-bn --apex +``` + +**Note:** all our models were trained on 8 nodes with 8 V100 GPUs each for a total of 64 GPUs. Expected training time for 64 GPUs is 24 hours, depending on the storage solution. +**Note 2:** hyperparameters for exact replication of our training can be found [here](https://github.com/pytorch/vision/blob/master/torchvision/models/video/README.md). Some hyperparameters such as learning rate are scaled linearly in proportion to the number of GPUs. + +### Single GPU + +**Note:** training on a single gpu can be extremely slow. + + +```bash +python train.py --data-path=/data/kinectics400 --train-dir=train --val-dir=val --batch-size=8 --cache-dataset +``` diff --git a/references/video_classification/presets.py b/references/video_classification/presets.py new file mode 100644 index 00000000000..3ee679ad5af --- /dev/null +++ b/references/video_classification/presets.py @@ -0,0 +1,40 @@ +import torch + +from torchvision.transforms import transforms +from transforms import ConvertBHWCtoBCHW, ConvertBCHWtoCBHW + + +class VideoClassificationPresetTrain: + def __init__(self, resize_size, crop_size, mean=(0.43216, 0.394666, 0.37645), std=(0.22803, 0.22145, 0.216989), + hflip_prob=0.5): + trans = [ + ConvertBHWCtoBCHW(), + transforms.ConvertImageDtype(torch.float32), + transforms.Resize(resize_size), + ] + if hflip_prob > 0: + trans.append(transforms.RandomHorizontalFlip(hflip_prob)) + trans.extend([ + transforms.Normalize(mean=mean, std=std), + transforms.RandomCrop(crop_size), + ConvertBCHWtoCBHW() + ]) + self.transforms = transforms.Compose(trans) + + def __call__(self, x): + return self.transforms(x) + + +class VideoClassificationPresetEval: + def __init__(self, resize_size, crop_size, mean=(0.43216, 0.394666, 0.37645), std=(0.22803, 0.22145, 0.216989)): + self.transforms = transforms.Compose([ + ConvertBHWCtoBCHW(), + transforms.ConvertImageDtype(torch.float32), + transforms.Resize(resize_size), + transforms.Normalize(mean=mean, std=std), + transforms.CenterCrop(crop_size), + ConvertBCHWtoCBHW() + ]) + + def __call__(self, x): + return self.transforms(x) diff --git a/references/video_classification/train.py b/references/video_classification/train.py index 8e41f9ec474..bcc74064344 100644 --- a/references/video_classification/train.py +++ b/references/video_classification/train.py @@ -1,22 +1,18 @@ -from __future__ import print_function import datetime import os import time -import sys - import torch import torch.utils.data from torch.utils.data.dataloader import default_collate from torch import nn import torchvision import torchvision.datasets.video_utils -from torchvision import transforms from torchvision.datasets.samplers import DistributedSampler, UniformClipSampler, RandomClipSampler +import presets import utils from scheduler import WarmupMultiStepLR -import transforms as T try: from apex import amp @@ -95,12 +91,9 @@ def collate_fn(batch): def main(args): - if args.apex: - if sys.version_info < (3, 0): - raise RuntimeError("Apex currently only supports Python 3. Aborting.") - if amp is None: - raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " - "to enable mixed-precision training.") + if args.apex and amp is None: + raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex " + "to enable mixed-precision training.") if args.output_dir: utils.mkdir(args.output_dir) @@ -116,21 +109,13 @@ def main(args): # Data loading code print("Loading data") - traindir = os.path.join(args.data_path, 'train_avi-480p') - valdir = os.path.join(args.data_path, 'val_avi-480p') - normalize = T.Normalize(mean=[0.43216, 0.394666, 0.37645], - std=[0.22803, 0.22145, 0.216989]) + traindir = os.path.join(args.data_path, args.train_dir) + valdir = os.path.join(args.data_path, args.val_dir) print("Loading training data") st = time.time() cache_path = _get_cache_path(traindir) - transform_train = torchvision.transforms.Compose([ - T.ToFloatTensorInZeroOne(), - T.Resize((128, 171)), - T.RandomHorizontalFlip(), - normalize, - T.RandomCrop((112, 112)) - ]) + transform_train = presets.VideoClassificationPresetTrain((128, 171), (112, 112)) if args.cache_dataset and os.path.exists(cache_path): print("Loading dataset_train from {}".format(cache_path)) @@ -145,7 +130,8 @@ def main(args): frames_per_clip=args.clip_len, step_between_clips=1, transform=transform_train, - frame_rate=15 + frame_rate=15, + extensions=('avi', 'mp4', ) ) if args.cache_dataset: print("Saving dataset_train to {}".format(cache_path)) @@ -157,12 +143,7 @@ def main(args): print("Loading validation data") cache_path = _get_cache_path(valdir) - transform_test = torchvision.transforms.Compose([ - T.ToFloatTensorInZeroOne(), - T.Resize((128, 171)), - normalize, - T.CenterCrop((112, 112)) - ]) + transform_test = presets.VideoClassificationPresetEval((128, 171), (112, 112)) if args.cache_dataset and os.path.exists(cache_path): print("Loading dataset_test from {}".format(cache_path)) @@ -177,7 +158,8 @@ def main(args): frames_per_clip=args.clip_len, step_between_clips=1, transform=transform_test, - frame_rate=15 + frame_rate=15, + extensions=('avi', 'mp4',) ) if args.cache_dataset: print("Saving dataset_test to {}".format(cache_path)) @@ -271,9 +253,11 @@ def main(args): def parse_args(): import argparse - parser = argparse.ArgumentParser(description='PyTorch Classification Training') + parser = argparse.ArgumentParser(description='PyTorch Video Classification Training') parser.add_argument('--data-path', default='/datasets01_101/kinetics/070618/', help='dataset') + parser.add_argument('--train-dir', default='train_avi-480p', help='name of train dir') + parser.add_argument('--val-dir', default='val_avi-480p', help='name of val dir') parser.add_argument('--model', default='r2plus1d_18', help='model') parser.add_argument('--device', default='cuda', help='device') parser.add_argument('--clip-len', default=16, type=int, metavar='N', diff --git a/references/video_classification/transforms.py b/references/video_classification/transforms.py index 9435450c4b3..27f6c75450a 100644 --- a/references/video_classification/transforms.py +++ b/references/video_classification/transforms.py @@ -1,122 +1,18 @@ import torch -import random +import torch.nn as nn -def crop(vid, i, j, h, w): - return vid[..., i:(i + h), j:(j + w)] +class ConvertBHWCtoBCHW(nn.Module): + """Convert tensor from (B, H, W, C) to (B, C, H, W) + """ + def forward(self, vid: torch.Tensor) -> torch.Tensor: + return vid.permute(0, 3, 1, 2) -def center_crop(vid, output_size): - h, w = vid.shape[-2:] - th, tw = output_size - i = int(round((h - th) / 2.)) - j = int(round((w - tw) / 2.)) - return crop(vid, i, j, th, tw) +class ConvertBCHWtoCBHW(nn.Module): + """Convert tensor from (B, C, H, W) to (C, B, H, W) + """ - -def hflip(vid): - return vid.flip(dims=(-1,)) - - -# NOTE: for those functions, which generally expect mini-batches, we keep them -# as non-minibatch so that they are applied as if they were 4d (thus image). -# this way, we only apply the transformation in the spatial domain -def resize(vid, size, interpolation='bilinear'): - # NOTE: using bilinear interpolation because we don't work on minibatches - # at this level - scale = None - if isinstance(size, int): - scale = float(size) / min(vid.shape[-2:]) - size = None - return torch.nn.functional.interpolate( - vid, size=size, scale_factor=scale, mode=interpolation, align_corners=False) - - -def pad(vid, padding, fill=0, padding_mode="constant"): - # NOTE: don't want to pad on temporal dimension, so let as non-batch - # (4d) before padding. This works as expected - return torch.nn.functional.pad(vid, padding, value=fill, mode=padding_mode) - - -def to_normalized_float_tensor(vid): - return vid.permute(3, 0, 1, 2).to(torch.float32) / 255 - - -def normalize(vid, mean, std): - shape = (-1,) + (1,) * (vid.dim() - 1) - mean = torch.as_tensor(mean).reshape(shape) - std = torch.as_tensor(std).reshape(shape) - return (vid - mean) / std - - -# Class interface - -class RandomCrop(object): - def __init__(self, size): - self.size = size - - @staticmethod - def get_params(vid, output_size): - """Get parameters for ``crop`` for a random crop. - """ - h, w = vid.shape[-2:] - th, tw = output_size - if w == tw and h == th: - return 0, 0, h, w - i = random.randint(0, h - th) - j = random.randint(0, w - tw) - return i, j, th, tw - - def __call__(self, vid): - i, j, h, w = self.get_params(vid, self.size) - return crop(vid, i, j, h, w) - - -class CenterCrop(object): - def __init__(self, size): - self.size = size - - def __call__(self, vid): - return center_crop(vid, self.size) - - -class Resize(object): - def __init__(self, size): - self.size = size - - def __call__(self, vid): - return resize(vid, self.size) - - -class ToFloatTensorInZeroOne(object): - def __call__(self, vid): - return to_normalized_float_tensor(vid) - - -class Normalize(object): - def __init__(self, mean, std): - self.mean = mean - self.std = std - - def __call__(self, vid): - return normalize(vid, self.mean, self.std) - - -class RandomHorizontalFlip(object): - def __init__(self, p=0.5): - self.p = p - - def __call__(self, vid): - if random.random() < self.p: - return hflip(vid) - return vid - - -class Pad(object): - def __init__(self, padding, fill=0): - self.padding = padding - self.fill = fill - - def __call__(self, vid): - return pad(vid, self.padding, self.fill) + def forward(self, vid: torch.Tensor) -> torch.Tensor: + return vid.permute(1, 0, 2, 3) diff --git a/references/video_classification/utils.py b/references/video_classification/utils.py index 5ea6dfef341..3573b84d780 100644 --- a/references/video_classification/utils.py +++ b/references/video_classification/utils.py @@ -1,4 +1,3 @@ -from __future__ import print_function from collections import defaultdict, deque import datetime import time diff --git a/setup.cfg b/setup.cfg index 5b77b5fbce3..fd3b74c47de 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,5 +9,8 @@ max-line-length = 120 [flake8] max-line-length = 120 -ignore = F401,E402,F403,W503,W504 +ignore = F401,E402,F403,W503,W504,F821 exclude = venv + +[pydocstyle] +select = D417 # Missing argument descriptions in the docstring diff --git a/setup.py b/setup.py index 8ece63ce739..ff4c48d4cbb 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,8 @@ -from __future__ import print_function import os import io -import re import sys from setuptools import setup, find_packages -from pkg_resources import get_distribution, DistributionNotFound +from pkg_resources import parse_version, get_distribution, DistributionNotFound import subprocess import distutils.command.clean import distutils.spawn @@ -13,6 +11,7 @@ import torch from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME +from torch.utils.hipify import hipify_python def read(*names, **kwargs): @@ -30,12 +29,14 @@ def get_dist(pkgname): return None -version = '0.5.0a0' +cwd = os.path.dirname(os.path.abspath(__file__)) + +version_txt = os.path.join(cwd, 'version.txt') +with open(version_txt, 'r') as f: + version = f.readline().strip() sha = 'Unknown' package_name = 'torchvision' -cwd = os.path.dirname(os.path.abspath(__file__)) - try: sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip() except Exception: @@ -45,7 +46,6 @@ def get_dist(pkgname): version = os.getenv('BUILD_VERSION') elif sha != 'Unknown': version += '+' + sha[:7] -print("Building wheel {}-{}".format(package_name, version)) def write_version_file(): @@ -58,32 +58,114 @@ def write_version_file(): f.write(" cuda = _check_cuda_version()\n") -write_version_file() - -readme = open('README.rst').read() - pytorch_dep = 'torch' if os.getenv('PYTORCH_VERSION'): pytorch_dep += "==" + os.getenv('PYTORCH_VERSION') requirements = [ 'numpy', - 'six', pytorch_dep, ] -pillow_ver = ' >= 4.1.1' +pillow_ver = ' >= 5.3.0' pillow_req = 'pillow-simd' if get_dist('pillow-simd') is not None else 'pillow' requirements.append(pillow_req + pillow_ver) +def find_library(name, vision_include): + this_dir = os.path.dirname(os.path.abspath(__file__)) + build_prefix = os.environ.get('BUILD_PREFIX', None) + is_conda_build = build_prefix is not None + + library_found = False + conda_installed = False + lib_folder = None + include_folder = None + library_header = '{0}.h'.format(name) + + # Lookup in TORCHVISION_INCLUDE or in the package file + package_path = [os.path.join(this_dir, 'torchvision')] + for folder in vision_include + package_path: + candidate_path = os.path.join(folder, library_header) + library_found = os.path.exists(candidate_path) + if library_found: + break + + if not library_found: + print('Running build on conda-build: {0}'.format(is_conda_build)) + if is_conda_build: + # Add conda headers/libraries + if os.name == 'nt': + build_prefix = os.path.join(build_prefix, 'Library') + include_folder = os.path.join(build_prefix, 'include') + lib_folder = os.path.join(build_prefix, 'lib') + library_header_path = os.path.join( + include_folder, library_header) + library_found = os.path.isfile(library_header_path) + conda_installed = library_found + else: + # Check if using Anaconda to produce wheels + conda = distutils.spawn.find_executable('conda') + is_conda = conda is not None + print('Running build on conda: {0}'.format(is_conda)) + if is_conda: + python_executable = sys.executable + py_folder = os.path.dirname(python_executable) + if os.name == 'nt': + env_path = os.path.join(py_folder, 'Library') + else: + env_path = os.path.dirname(py_folder) + lib_folder = os.path.join(env_path, 'lib') + include_folder = os.path.join(env_path, 'include') + library_header_path = os.path.join( + include_folder, library_header) + library_found = os.path.isfile(library_header_path) + conda_installed = library_found + + if not library_found: + if sys.platform == 'linux': + library_found = os.path.exists('/usr/include/{0}'.format( + library_header)) + library_found = library_found or os.path.exists( + '/usr/local/include/{0}'.format(library_header)) + + return library_found, conda_installed, include_folder, lib_folder + + def get_extensions(): this_dir = os.path.dirname(os.path.abspath(__file__)) extensions_dir = os.path.join(this_dir, 'torchvision', 'csrc') - main_file = glob.glob(os.path.join(extensions_dir, '*.cpp')) - source_cpu = glob.glob(os.path.join(extensions_dir, 'cpu', '*.cpp')) - source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu')) + main_file = glob.glob(os.path.join(extensions_dir, '*.cpp')) + glob.glob(os.path.join(extensions_dir, 'ops', + '*.cpp')) + source_cpu = ( + glob.glob(os.path.join(extensions_dir, 'ops', 'autograd', '*.cpp')) + + glob.glob(os.path.join(extensions_dir, 'ops', 'cpu', '*.cpp')) + + glob.glob(os.path.join(extensions_dir, 'ops', 'quantized', 'cpu', '*.cpp')) + ) + + is_rocm_pytorch = False + if torch.__version__ >= '1.5': + from torch.utils.cpp_extension import ROCM_HOME + is_rocm_pytorch = True if ((torch.version.hip is not None) and (ROCM_HOME is not None)) else False + + if is_rocm_pytorch: + hipify_python.hipify( + project_directory=this_dir, + output_directory=this_dir, + includes="torchvision/csrc/ops/cuda/*", + show_detailed=True, + is_pytorch_extension=True, + ) + source_cuda = glob.glob(os.path.join(extensions_dir, 'ops', 'hip', '*.hip')) + # Copy over additional files + for file in glob.glob(r"torchvision/csrc/ops/cuda/*.h"): + shutil.copy(file, "torchvision/csrc/ops/hip") + + else: + source_cuda = glob.glob(os.path.join(extensions_dir, 'ops', 'cuda', '*.cu')) + + source_cuda += glob.glob(os.path.join(extensions_dir, 'ops', 'autocast', '*.cpp')) sources = main_file + source_cpu extension = CppExtension @@ -102,46 +184,50 @@ def get_extensions(): define_macros = [] - extra_compile_args = {} - if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1': + extra_compile_args = {'cxx': []} + if (torch.cuda.is_available() and ((CUDA_HOME is not None) or is_rocm_pytorch)) \ + or os.getenv('FORCE_CUDA', '0') == '1': extension = CUDAExtension sources += source_cuda - define_macros += [('WITH_CUDA', None)] - nvcc_flags = os.getenv('NVCC_FLAGS', '') - if nvcc_flags == '': - nvcc_flags = [] + if not is_rocm_pytorch: + define_macros += [('WITH_CUDA', None)] + nvcc_flags = os.getenv('NVCC_FLAGS', '') + if nvcc_flags == '': + nvcc_flags = [] + else: + nvcc_flags = nvcc_flags.split(' ') else: - nvcc_flags = nvcc_flags.split(' ') - extra_compile_args = { - 'cxx': ['-O0'], - 'nvcc': nvcc_flags, - } + define_macros += [('WITH_HIP', None)] + nvcc_flags = [] + extra_compile_args["nvcc"] = nvcc_flags if sys.platform == 'win32': define_macros += [('torchvision_EXPORTS', None)] - extra_compile_args.setdefault('cxx', []) extra_compile_args['cxx'].append('/MP') + debug_mode = os.getenv('DEBUG', '0') == '1' + if debug_mode: + print("Compile in debug mode") + extra_compile_args['cxx'].append("-g") + extra_compile_args['cxx'].append("-O0") + if "nvcc" in extra_compile_args: + # we have to remove "-OX" and "-g" flag if exists and append + nvcc_flags = extra_compile_args["nvcc"] + extra_compile_args["nvcc"] = [ + f for f in nvcc_flags if not ("-O" in f or "-g" in f) + ] + extra_compile_args["nvcc"].append("-O0") + extra_compile_args["nvcc"].append("-g") + sources = [os.path.join(extensions_dir, s) for s in sources] include_dirs = [extensions_dir] - ffmpeg_exe = distutils.spawn.find_executable('ffmpeg') - has_ffmpeg = ffmpeg_exe is not None - if has_ffmpeg: - ffmpeg_bin = os.path.dirname(ffmpeg_exe) - ffmpeg_root = os.path.dirname(ffmpeg_bin) - ffmpeg_include_dir = os.path.join(ffmpeg_root, 'include') - - # TorchVision video reader - video_reader_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'video_reader') - video_reader_src = glob.glob(os.path.join(video_reader_src_dir, "*.cpp")) - ext_modules = [ extension( 'torchvision._C', - sources, + sorted(sources), include_dirs=include_dirs, define_macros=define_macros, extra_compile_args=extra_compile_args, @@ -157,16 +243,172 @@ def get_extensions(): extra_compile_args=extra_compile_args, ) ) + + # ------------------- Torchvision extra extensions ------------------------ + vision_include = os.environ.get('TORCHVISION_INCLUDE', None) + vision_library = os.environ.get('TORCHVISION_LIBRARY', None) + vision_include = (vision_include.split(os.pathsep) + if vision_include is not None else []) + vision_library = (vision_library.split(os.pathsep) + if vision_library is not None else []) + include_dirs += vision_include + library_dirs = vision_library + + # Image reading extension + image_macros = [] + image_include = [extensions_dir] + image_library = [] + image_link_flags = [] + + # Locating libPNG + libpng = distutils.spawn.find_executable('libpng-config') + pngfix = distutils.spawn.find_executable('pngfix') + png_found = libpng is not None or pngfix is not None + print('PNG found: {0}'.format(png_found)) + if png_found: + if libpng is not None: + # Linux / Mac + png_version = subprocess.run([libpng, '--version'], + stdout=subprocess.PIPE) + png_version = png_version.stdout.strip().decode('utf-8') + print('libpng version: {0}'.format(png_version)) + png_version = parse_version(png_version) + if png_version >= parse_version("1.6.0"): + print('Building torchvision with PNG image support') + png_lib = subprocess.run([libpng, '--libdir'], + stdout=subprocess.PIPE) + png_lib = png_lib.stdout.strip().decode('utf-8') + if 'disabled' not in png_lib: + image_library += [png_lib] + png_include = subprocess.run([libpng, '--I_opts'], + stdout=subprocess.PIPE) + png_include = png_include.stdout.strip().decode('utf-8') + _, png_include = png_include.split('-I') + print('libpng include path: {0}'.format(png_include)) + image_include += [png_include] + image_link_flags.append('png') + else: + print('libpng installed version is less than 1.6.0, ' + 'disabling PNG support') + png_found = False + else: + # Windows + png_lib = os.path.join( + os.path.dirname(os.path.dirname(pngfix)), 'lib') + png_include = os.path.join(os.path.dirname( + os.path.dirname(pngfix)), 'include', 'libpng16') + image_library += [png_lib] + image_include += [png_include] + image_link_flags.append('libpng') + + # Locating libjpeg + (jpeg_found, jpeg_conda, + jpeg_include, jpeg_lib) = find_library('jpeglib', vision_include) + + print('JPEG found: {0}'.format(jpeg_found)) + image_macros += [('PNG_FOUND', str(int(png_found)))] + image_macros += [('JPEG_FOUND', str(int(jpeg_found)))] + if jpeg_found: + print('Building torchvision with JPEG image support') + image_link_flags.append('jpeg') + if jpeg_conda: + image_library += [jpeg_lib] + image_include += [jpeg_include] + + image_path = os.path.join(extensions_dir, 'io', 'image') + image_src = glob.glob(os.path.join(image_path, '*.cpp')) + glob.glob(os.path.join(image_path, 'cpu', '*.cpp')) + + if png_found or jpeg_found: + ext_modules.append(extension( + 'torchvision.image', + image_src, + include_dirs=image_include + include_dirs + [image_path], + library_dirs=image_library + library_dirs, + define_macros=image_macros, + libraries=image_link_flags, + extra_compile_args=extra_compile_args + )) + + ffmpeg_exe = distutils.spawn.find_executable('ffmpeg') + has_ffmpeg = ffmpeg_exe is not None + print("FFmpeg found: {}".format(has_ffmpeg)) + if has_ffmpeg: + ffmpeg_libraries = { + 'libavcodec', + 'libavformat', + 'libavutil', + 'libswresample', + 'libswscale' + } + + ffmpeg_bin = os.path.dirname(ffmpeg_exe) + ffmpeg_root = os.path.dirname(ffmpeg_bin) + ffmpeg_include_dir = os.path.join(ffmpeg_root, 'include') + ffmpeg_library_dir = os.path.join(ffmpeg_root, 'lib') + + gcc = distutils.spawn.find_executable('gcc') + platform_tag = subprocess.run( + [gcc, '-print-multiarch'], stdout=subprocess.PIPE) + platform_tag = platform_tag.stdout.strip().decode('utf-8') + + if platform_tag: + # Most probably a Debian-based distribution + ffmpeg_include_dir = [ + ffmpeg_include_dir, + os.path.join(ffmpeg_include_dir, platform_tag) + ] + ffmpeg_library_dir = [ + ffmpeg_library_dir, + os.path.join(ffmpeg_library_dir, platform_tag) + ] + else: + ffmpeg_include_dir = [ffmpeg_include_dir] + ffmpeg_library_dir = [ffmpeg_library_dir] + + has_ffmpeg = True + for library in ffmpeg_libraries: + library_found = False + for search_path in ffmpeg_include_dir + include_dirs: + full_path = os.path.join(search_path, library, '*.h') + library_found |= len(glob.glob(full_path)) > 0 + + if not library_found: + print('{0} header files were not found, disabling ffmpeg ' + 'support') + has_ffmpeg = False + + if has_ffmpeg: + print("ffmpeg include path: {}".format(ffmpeg_include_dir)) + print("ffmpeg library_dir: {}".format(ffmpeg_library_dir)) + + # TorchVision base decoder + video reader + video_reader_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'io', 'video_reader') + video_reader_src = glob.glob(os.path.join(video_reader_src_dir, "*.cpp")) + base_decoder_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'io', 'decoder') + base_decoder_src = glob.glob( + os.path.join(base_decoder_src_dir, "*.cpp")) + # Torchvision video API + videoapi_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'io', 'video') + videoapi_src = glob.glob(os.path.join(videoapi_src_dir, "*.cpp")) + # exclude tests + base_decoder_src = [x for x in base_decoder_src if '_test.cpp' not in x] + + combined_src = video_reader_src + base_decoder_src + videoapi_src + ext_modules.append( CppExtension( 'torchvision.video_reader', - video_reader_src, + combined_src, include_dirs=[ + base_decoder_src_dir, video_reader_src_dir, - ffmpeg_include_dir, + videoapi_src_dir, extensions_dir, + *ffmpeg_include_dir, + *include_dirs ], + library_dirs=ffmpeg_library_dir + library_dirs, libraries=[ 'avcodec', 'avformat', @@ -174,8 +416,8 @@ def get_extensions(): 'swresample', 'swscale', ], - extra_compile_args=["-std=c++14"], - extra_link_args=["-std=c++14"], + extra_compile_args=["-std=c++14"] if os.name != 'nt' else ['/std:c++14', '/MP'], + extra_link_args=["-std=c++14" if os.name != 'nt' else '/std:c++14'], ) ) @@ -197,28 +439,38 @@ def run(self): distutils.command.clean.clean.run(self) -setup( - # Metadata - name=package_name, - version=version, - author='PyTorch Core Team', - author_email='soumith@pytorch.org', - url='https://github.com/pytorch/vision', - description='image and video datasets and models for torch deep learning', - long_description=readme, - license='BSD', - - # Package info - packages=find_packages(exclude=('test',)), - - zip_safe=False, - install_requires=requirements, - extras_require={ - "scipy": ["scipy"], - }, - ext_modules=get_extensions(), - cmdclass={ - 'build_ext': BuildExtension.with_options(no_python_abi_suffix=True), - 'clean': clean, - } -) +if __name__ == "__main__": + print("Building wheel {}-{}".format(package_name, version)) + + write_version_file() + + with open('README.rst') as f: + readme = f.read() + + setup( + # Metadata + name=package_name, + version=version, + author='PyTorch Core Team', + author_email='soumith@pytorch.org', + url='https://github.com/pytorch/vision', + description='image and video datasets and models for torch deep learning', + long_description=readme, + license='BSD', + + # Package info + packages=find_packages(exclude=('test',)), + package_data={ + package_name: ['*.dll', '*.dylib', '*.so'] + }, + zip_safe=False, + install_requires=requirements, + extras_require={ + "scipy": ["scipy"], + }, + ext_modules=get_extensions(), + cmdclass={ + 'build_ext': BuildExtension.with_options(no_python_abi_suffix=True), + 'clean': clean, + } + ) diff --git a/test/_utils_internal.py b/test/_utils_internal.py new file mode 100644 index 00000000000..1a32e6f2b25 --- /dev/null +++ b/test/_utils_internal.py @@ -0,0 +1,7 @@ +import os + + +# Get relative file path +# this returns relative path from current file. +def get_relative_path(curr_file, *path_components): + return os.path.join(os.path.dirname(curr_file), *path_components) diff --git a/test/assets/damaged_jpeg/TensorFlow-LICENSE b/test/assets/damaged_jpeg/TensorFlow-LICENSE new file mode 100644 index 00000000000..c7563fe4e5b --- /dev/null +++ b/test/assets/damaged_jpeg/TensorFlow-LICENSE @@ -0,0 +1,13 @@ + Copyright 2019 The TensorFlow Authors. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/test/assets/damaged_jpeg/bad_huffman.jpg b/test/assets/damaged_jpeg/bad_huffman.jpg new file mode 100644 index 00000000000..ef5b6f12c55 Binary files /dev/null and b/test/assets/damaged_jpeg/bad_huffman.jpg differ diff --git a/test/assets/damaged_jpeg/corrupt.jpg b/test/assets/damaged_jpeg/corrupt.jpg new file mode 100644 index 00000000000..5e2fe6c56f5 Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt.jpg differ diff --git a/test/assets/damaged_jpeg/corrupt34_2.jpg b/test/assets/damaged_jpeg/corrupt34_2.jpg new file mode 100644 index 00000000000..4211155c455 Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_2.jpg differ diff --git a/test/assets/damaged_jpeg/corrupt34_3.jpg b/test/assets/damaged_jpeg/corrupt34_3.jpg new file mode 100644 index 00000000000..c1c2a9d1e1e Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_3.jpg differ diff --git a/test/assets/damaged_jpeg/corrupt34_4.jpg b/test/assets/damaged_jpeg/corrupt34_4.jpg new file mode 100644 index 00000000000..b8e7308ba00 Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_4.jpg differ diff --git a/test/assets/grace_hopper_517x606.jpg b/test/assets/encode_jpeg/grace_hopper_517x606.jpg similarity index 100% rename from test/assets/grace_hopper_517x606.jpg rename to test/assets/encode_jpeg/grace_hopper_517x606.jpg diff --git a/test/assets/encode_jpeg/jpeg_write/grace_hopper_517x606_pil.jpg b/test/assets/encode_jpeg/jpeg_write/grace_hopper_517x606_pil.jpg new file mode 100644 index 00000000000..0f37ea0d9e1 Binary files /dev/null and b/test/assets/encode_jpeg/jpeg_write/grace_hopper_517x606_pil.jpg differ diff --git a/test/assets/fakedata/draw_boxes_util.png b/test/assets/fakedata/draw_boxes_util.png new file mode 100644 index 00000000000..2c361c5fafd Binary files /dev/null and b/test/assets/fakedata/draw_boxes_util.png differ diff --git a/test/assets/fakedata/draw_boxes_vanilla.png b/test/assets/fakedata/draw_boxes_vanilla.png new file mode 100644 index 00000000000..bbc7112deb0 Binary files /dev/null and b/test/assets/fakedata/draw_boxes_vanilla.png differ diff --git a/test/assets/fakedata/draw_segm_masks_colors_util.png b/test/assets/fakedata/draw_segm_masks_colors_util.png new file mode 100644 index 00000000000..454b3555631 Binary files /dev/null and b/test/assets/fakedata/draw_segm_masks_colors_util.png differ diff --git a/test/assets/fakedata/draw_segm_masks_no_colors_util.png b/test/assets/fakedata/draw_segm_masks_no_colors_util.png new file mode 100644 index 00000000000..f048d2469d2 Binary files /dev/null and b/test/assets/fakedata/draw_segm_masks_no_colors_util.png differ diff --git a/test/assets/fakedata/logos/cmyk_pytorch.jpg b/test/assets/fakedata/logos/cmyk_pytorch.jpg new file mode 100644 index 00000000000..16ee8b2b4bc Binary files /dev/null and b/test/assets/fakedata/logos/cmyk_pytorch.jpg differ diff --git a/test/assets/fakedata/logos/gray_pytorch.jpg b/test/assets/fakedata/logos/gray_pytorch.jpg new file mode 100644 index 00000000000..60c9c7cf705 Binary files /dev/null and b/test/assets/fakedata/logos/gray_pytorch.jpg differ diff --git a/test/assets/fakedata/logos/gray_pytorch.png b/test/assets/fakedata/logos/gray_pytorch.png new file mode 100644 index 00000000000..412b931299e Binary files /dev/null and b/test/assets/fakedata/logos/gray_pytorch.png differ diff --git a/test/assets/fakedata/logos/grayalpha_pytorch.png b/test/assets/fakedata/logos/grayalpha_pytorch.png new file mode 100644 index 00000000000..3e77d72b904 Binary files /dev/null and b/test/assets/fakedata/logos/grayalpha_pytorch.png differ diff --git a/test/assets/fakedata/logos/palette_pytorch.png b/test/assets/fakedata/logos/palette_pytorch.png new file mode 100644 index 00000000000..2108d1b315a Binary files /dev/null and b/test/assets/fakedata/logos/palette_pytorch.png differ diff --git a/test/assets/fakedata/logos/rgb_pytorch.jpg b/test/assets/fakedata/logos/rgb_pytorch.jpg new file mode 100644 index 00000000000..d49e658b94f Binary files /dev/null and b/test/assets/fakedata/logos/rgb_pytorch.jpg differ diff --git a/test/assets/fakedata/logos/rgb_pytorch.png b/test/assets/fakedata/logos/rgb_pytorch.png new file mode 100644 index 00000000000..c9d08e6c7da Binary files /dev/null and b/test/assets/fakedata/logos/rgb_pytorch.png differ diff --git a/test/assets/fakedata/logos/rgbalpha_pytorch.png b/test/assets/fakedata/logos/rgbalpha_pytorch.png new file mode 100644 index 00000000000..5a9ff14ba5e Binary files /dev/null and b/test/assets/fakedata/logos/rgbalpha_pytorch.png differ diff --git a/test/assets/gaussian_blur_opencv_results.pt b/test/assets/gaussian_blur_opencv_results.pt new file mode 100644 index 00000000000..d68f477fb44 Binary files /dev/null and b/test/assets/gaussian_blur_opencv_results.pt differ diff --git a/test/assets/videos/hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi b/test/assets/videos/hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi new file mode 100644 index 00000000000..979cd3901af Binary files /dev/null and b/test/assets/videos/hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi differ diff --git a/test/common_utils.py b/test/common_utils.py index 9c0c3175ef1..7e16864d56c 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -5,9 +5,24 @@ import unittest import argparse import sys +import io import torch -import errno +import warnings import __main__ +import random +import inspect + +from numbers import Number +from torch._six import string_classes +from collections import OrderedDict +from _utils_internal import get_relative_path + +import numpy as np +from PIL import Image + +IS_PY39 = sys.version_info.major == 3 and sys.version_info.minor == 9 +PY39_SEGFAULT_SKIP_MSG = "Segmentation fault with Python 3.9, see https://github.com/pytorch/vision/issues/3367" +PY39_SKIP = unittest.skipIf(IS_PY39, PY39_SEGFAULT_SKIP_MSG) @contextlib.contextmanager @@ -22,7 +37,15 @@ def get_tmp_dir(src=None, **kwargs): shutil.rmtree(tmp_dir) +def set_rng_seed(seed): + torch.manual_seed(seed) + random.seed(seed) + np.random.seed(seed) + + ACCEPT = os.getenv('EXPECTTEST_ACCEPT') +TEST_WITH_SLOW = os.getenv('PYTORCH_TEST_WITH_SLOW', '0') == '1' + parser = argparse.ArgumentParser(add_help=False) parser.add_argument('--accept', action='store_true') @@ -64,90 +87,335 @@ def map_nested_tensor_object(object, tensor_map_fn): return impl(object) +def is_iterable(obj): + try: + iter(obj) + return True + except TypeError: + return False + + # adapted from TestCase in torch/test/common_utils to accept non-string # inputs and set maximum binary size class TestCase(unittest.TestCase): - def assertExpected(self, output, subname=None, rtol=None, atol=None): - r""" - Test that a python value matches the recorded contents of a file - derived from the name of this test and subname. The value must be - pickable with `torch.save`. This file - is placed in the 'expect' directory in the same directory - as the test script. You can automatically update the recorded test - output using --accept. + precision = 1e-5 - If you call this multiple times in a single function, you must - give a unique subname each time. - """ - def remove_prefix(text, prefix): + def _get_expected_file(self, subname=None, strip_suffix=None): + def remove_prefix_suffix(text, prefix, suffix): if text.startswith(prefix): - return text[len(prefix):] + text = text[len(prefix):] + if suffix is not None and text.endswith(suffix): + text = text[:len(text) - len(suffix)] return text # NB: we take __file__ from the module that defined the test # class, so we place the expect directory where the test script # lives, NOT where test/common_utils.py lives. module_id = self.__class__.__module__ - munged_id = remove_prefix(self.id(), module_id + ".") - test_file = os.path.realpath(sys.modules[module_id].__file__) - expected_file = os.path.join(os.path.dirname(test_file), - "expect", - munged_id) + munged_id = remove_prefix_suffix(self.id(), module_id + ".", strip_suffix) + + # Determine expected file based on environment + expected_file_base = get_relative_path( + os.path.realpath(sys.modules[module_id].__file__), + "expect") - subname_output = "" + # Set expected_file based on subname. + expected_file = os.path.join(expected_file_base, munged_id) if subname: expected_file += "_" + subname - subname_output = " ({})".format(subname) expected_file += "_expect.pkl" - expected = None - def accept_output(update_type): - print("Accepting {} for {}{}:\n\n{}".format(update_type, munged_id, subname_output, output)) + if not ACCEPT and not os.path.exists(expected_file): + raise RuntimeError( + f"No expect file exists for {os.path.basename(expected_file)} in {expected_file}; " + "to accept the current output, run:\n" + f"python {__main__.__file__} {munged_id} --accept") + + return expected_file + + def assertExpected(self, output, subname=None, prec=None, strip_suffix=None): + r""" + Test that a python value matches the recorded contents of a file + derived from the name of this test and subname. The value must be + pickable with `torch.save`. This file + is placed in the 'expect' directory in the same directory + as the test script. You can automatically update the recorded test + output using --accept. + + If you call this multiple times in a single function, you must + give a unique subname each time. + + strip_suffix allows different tests that expect similar numerics, e.g. + "test_xyz_cuda" and "test_xyz_cpu", to use the same pickled data. + test_xyz_cuda would pass strip_suffix="_cuda", test_xyz_cpu would pass + strip_suffix="_cpu", and they would both use a data file name based on + "test_xyz". + """ + expected_file = self._get_expected_file(subname, strip_suffix) + + if ACCEPT: + filename = {os.path.basename(expected_file)} + print("Accepting updated output for {}:\n\n{}".format(filename, output)) torch.save(output, expected_file) MAX_PICKLE_SIZE = 50 * 1000 # 50 KB binary_size = os.path.getsize(expected_file) - self.assertTrue(binary_size <= MAX_PICKLE_SIZE) - - try: + if binary_size > MAX_PICKLE_SIZE: + raise RuntimeError("The output for {}, is larger than 50kb".format(filename)) + else: expected = torch.load(expected_file) - except IOError as e: - if e.errno != errno.ENOENT: - raise - elif ACCEPT: - return accept_output("output") - else: - raise RuntimeError( - ("I got this output for {}{}:\n\n{}\n\n" - "No expect file exists; to accept the current output, run:\n" - "python {} {} --accept").format(munged_id, subname_output, output, __main__.__file__, munged_id)) + self.assertEqual(output, expected, prec=prec) - if ACCEPT: - equal = False - try: - equal = self.assertNestedTensorObjectsEqual(output, expected, rtol=rtol, atol=atol) - except Exception: - equal = False - if not equal: - return accept_output("updated output") + def assertEqual(self, x, y, prec=None, message='', allow_inf=False): + """ + This is copied from pytorch/test/common_utils.py's TestCase.assertEqual + """ + if isinstance(prec, str) and message == '': + message = prec + prec = None + if prec is None: + prec = self.precision + + if isinstance(x, torch.Tensor) and isinstance(y, Number): + self.assertEqual(x.item(), y, prec=prec, message=message, + allow_inf=allow_inf) + elif isinstance(y, torch.Tensor) and isinstance(x, Number): + self.assertEqual(x, y.item(), prec=prec, message=message, + allow_inf=allow_inf) + elif isinstance(x, torch.Tensor) and isinstance(y, torch.Tensor): + def assertTensorsEqual(a, b): + super(TestCase, self).assertEqual(a.size(), b.size(), message) + if a.numel() > 0: + if (a.device.type == 'cpu' and (a.dtype == torch.float16 or a.dtype == torch.bfloat16)): + # CPU half and bfloat16 tensors don't have the methods we need below + a = a.to(torch.float32) + b = b.to(a) + + if (a.dtype == torch.bool) != (b.dtype == torch.bool): + raise TypeError("Was expecting both tensors to be bool type.") + else: + if a.dtype == torch.bool and b.dtype == torch.bool: + # we want to respect precision but as bool doesn't support substraction, + # boolean tensor has to be converted to int + a = a.to(torch.int) + b = b.to(torch.int) + + diff = a - b + if a.is_floating_point(): + # check that NaNs are in the same locations + nan_mask = torch.isnan(a) + self.assertTrue(torch.equal(nan_mask, torch.isnan(b)), message) + diff[nan_mask] = 0 + # inf check if allow_inf=True + if allow_inf: + inf_mask = torch.isinf(a) + inf_sign = inf_mask.sign() + self.assertTrue(torch.equal(inf_sign, torch.isinf(b).sign()), message) + diff[inf_mask] = 0 + # TODO: implement abs on CharTensor (int8) + if diff.is_signed() and diff.dtype != torch.int8: + diff = diff.abs() + max_err = diff.max() + tolerance = prec + prec * abs(a.max()) + self.assertLessEqual(max_err, tolerance, message) + super(TestCase, self).assertEqual(x.is_sparse, y.is_sparse, message) + super(TestCase, self).assertEqual(x.is_quantized, y.is_quantized, message) + if x.is_sparse: + x = self.safeCoalesce(x) + y = self.safeCoalesce(y) + assertTensorsEqual(x._indices(), y._indices()) + assertTensorsEqual(x._values(), y._values()) + elif x.is_quantized and y.is_quantized: + self.assertEqual(x.qscheme(), y.qscheme(), prec=prec, + message=message, allow_inf=allow_inf) + if x.qscheme() == torch.per_tensor_affine: + self.assertEqual(x.q_scale(), y.q_scale(), prec=prec, + message=message, allow_inf=allow_inf) + self.assertEqual(x.q_zero_point(), y.q_zero_point(), + prec=prec, message=message, + allow_inf=allow_inf) + elif x.qscheme() == torch.per_channel_affine: + self.assertEqual(x.q_per_channel_scales(), y.q_per_channel_scales(), prec=prec, + message=message, allow_inf=allow_inf) + self.assertEqual(x.q_per_channel_zero_points(), y.q_per_channel_zero_points(), + prec=prec, message=message, + allow_inf=allow_inf) + self.assertEqual(x.q_per_channel_axis(), y.q_per_channel_axis(), + prec=prec, message=message) + self.assertEqual(x.dtype, y.dtype) + self.assertEqual(x.int_repr().to(torch.int32), + y.int_repr().to(torch.int32), prec=prec, + message=message, allow_inf=allow_inf) + else: + assertTensorsEqual(x, y) + elif isinstance(x, string_classes) and isinstance(y, string_classes): + super(TestCase, self).assertEqual(x, y, message) + elif type(x) == set and type(y) == set: + super(TestCase, self).assertEqual(x, y, message) + elif isinstance(x, dict) and isinstance(y, dict): + if isinstance(x, OrderedDict) and isinstance(y, OrderedDict): + self.assertEqual(x.items(), y.items(), prec=prec, + message=message, allow_inf=allow_inf) + else: + self.assertEqual(set(x.keys()), set(y.keys()), prec=prec, + message=message, allow_inf=allow_inf) + key_list = list(x.keys()) + self.assertEqual([x[k] for k in key_list], + [y[k] for k in key_list], + prec=prec, message=message, + allow_inf=allow_inf) + elif is_iterable(x) and is_iterable(y): + super(TestCase, self).assertEqual(len(x), len(y), message) + for x_, y_ in zip(x, y): + self.assertEqual(x_, y_, prec=prec, message=message, + allow_inf=allow_inf) + elif isinstance(x, bool) and isinstance(y, bool): + super(TestCase, self).assertEqual(x, y, message) + elif isinstance(x, Number) and isinstance(y, Number): + inf = float("inf") + if abs(x) == inf or abs(y) == inf: + if allow_inf: + super(TestCase, self).assertEqual(x, y, message) + else: + self.fail("Expected finite numeric values - x={}, y={}".format(x, y)) + return + super(TestCase, self).assertLessEqual(abs(x - y), prec, message) else: - self.assertNestedTensorObjectsEqual(output, expected, rtol=rtol, atol=atol) + super(TestCase, self).assertEqual(x, y, message) - def assertNestedTensorObjectsEqual(self, a, b, rtol=None, atol=None): - self.assertEqual(type(a), type(b)) + def check_jit_scriptable(self, nn_module, args, unwrapper=None, skip=False): + """ + Check that a nn.Module's results in TorchScript match eager and that it + can be exported + """ + if not TEST_WITH_SLOW or skip: + # TorchScript is not enabled, skip these tests + msg = "The check_jit_scriptable test for {} was skipped. " \ + "This test checks if the module's results in TorchScript " \ + "match eager and that it can be exported. To run these " \ + "tests make sure you set the environment variable " \ + "PYTORCH_TEST_WITH_SLOW=1 and that the test is not " \ + "manually skipped.".format(nn_module.__class__.__name__) + warnings.warn(msg, RuntimeWarning) + return None - if isinstance(a, torch.Tensor): - torch.testing.assert_allclose(a, b, rtol=rtol, atol=atol) + sm = torch.jit.script(nn_module) - elif isinstance(a, dict): - self.assertEqual(len(a), len(b)) - for key, value in a.items(): - self.assertTrue(key in b, "key: " + str(key)) + with freeze_rng_state(): + eager_out = nn_module(*args) - self.assertNestedTensorObjectsEqual(value, b[key], rtol=rtol, atol=atol) - elif isinstance(a, (list, tuple)): - self.assertEqual(len(a), len(b)) + with freeze_rng_state(): + script_out = sm(*args) + if unwrapper: + script_out = unwrapper(script_out) - for val1, val2 in zip(a, b): - self.assertNestedTensorObjectsEqual(val1, val2, rtol=rtol, atol=atol) + self.assertEqual(eager_out, script_out, prec=1e-4) + self.assertExportImportModule(sm, args) - else: - self.assertEqual(a, b) + return sm + + def getExportImportCopy(self, m): + """ + Save and load a TorchScript model + """ + buffer = io.BytesIO() + torch.jit.save(m, buffer) + buffer.seek(0) + imported = torch.jit.load(buffer) + return imported + + def assertExportImportModule(self, m, args): + """ + Check that the results of a model are the same after saving and loading + """ + m_import = self.getExportImportCopy(m) + with freeze_rng_state(): + results = m(*args) + with freeze_rng_state(): + results_from_imported = m_import(*args) + self.assertEqual(results, results_from_imported, prec=3e-5) + + +@contextlib.contextmanager +def freeze_rng_state(): + rng_state = torch.get_rng_state() + if torch.cuda.is_available(): + cuda_rng_state = torch.cuda.get_rng_state() + yield + if torch.cuda.is_available(): + torch.cuda.set_rng_state(cuda_rng_state) + torch.set_rng_state(rng_state) + + +class TransformsTester(unittest.TestCase): + + def _create_data(self, height=3, width=3, channels=3, device="cpu"): + tensor = torch.randint(0, 256, (channels, height, width), dtype=torch.uint8, device=device) + pil_img = Image.fromarray(tensor.permute(1, 2, 0).contiguous().cpu().numpy()) + return tensor, pil_img + + def _create_data_batch(self, height=3, width=3, channels=3, num_samples=4, device="cpu"): + batch_tensor = torch.randint( + 0, 256, + (num_samples, channels, height, width), + dtype=torch.uint8, + device=device + ) + return batch_tensor + + def compareTensorToPIL(self, tensor, pil_image, msg=None): + np_pil_image = np.array(pil_image) + if np_pil_image.ndim == 2: + np_pil_image = np_pil_image[:, :, None] + pil_tensor = torch.as_tensor(np_pil_image.transpose((2, 0, 1))) + if msg is None: + msg = "tensor:\n{} \ndid not equal PIL tensor:\n{}".format(tensor, pil_tensor) + self.assertTrue(tensor.cpu().equal(pil_tensor), msg) + + def approxEqualTensorToPIL(self, tensor, pil_image, tol=1e-5, msg=None, agg_method="mean"): + np_pil_image = np.array(pil_image) + if np_pil_image.ndim == 2: + np_pil_image = np_pil_image[:, :, None] + pil_tensor = torch.as_tensor(np_pil_image.transpose((2, 0, 1))).to(tensor) + # error value can be mean absolute error, max abs error + err = getattr(torch, agg_method)(torch.abs(tensor - pil_tensor)).item() + self.assertTrue( + err < tol, + msg="{}: err={}, tol={}: \n{}\nvs\n{}".format(msg, err, tol, tensor[0, :10, :10], pil_tensor[0, :10, :10]) + ) + + +def cycle_over(objs): + for idx, obj in enumerate(objs): + yield obj, objs[:idx] + objs[idx + 1:] + + +def int_dtypes(): + return torch.testing.integral_types() + + +def float_dtypes(): + return torch.testing.floating_types() + + +@contextlib.contextmanager +def disable_console_output(): + with contextlib.ExitStack() as stack, open(os.devnull, "w") as devnull: + stack.enter_context(contextlib.redirect_stdout(devnull)) + stack.enter_context(contextlib.redirect_stderr(devnull)) + yield + + +def call_args_to_kwargs_only(call_args, *callable_or_arg_names): + callable_or_arg_name = callable_or_arg_names[0] + if callable(callable_or_arg_name): + argspec = inspect.getfullargspec(callable_or_arg_name) + arg_names = argspec.args + if isinstance(callable_or_arg_name, type): + # remove self + arg_names.pop(0) + else: + arg_names = callable_or_arg_names + + args, kwargs = call_args + kwargs_only = kwargs.copy() + kwargs_only.update(dict(zip(arg_names, args))) + return kwargs_only diff --git a/test/cpp/test_custom_operators.cpp b/test/cpp/test_custom_operators.cpp new file mode 100644 index 00000000000..d660852bbfb --- /dev/null +++ b/test/cpp/test_custom_operators.cpp @@ -0,0 +1,60 @@ +// Copyright 2004-present Facebook. All Rights Reserved. + +#include +#include +#include + +// FIXME: the include path differs from OSS due to the extra csrc +#include + +TEST(test_custom_operators, nms) { + // make sure that the torchvision ops are visible to the jit interpreter + auto& ops = torch::jit::getAllOperatorsFor(torch::jit::Symbol::fromQualString("torchvision::nms")); + ASSERT_EQ(ops.size(), 1); + + auto& op = ops.front(); + ASSERT_EQ(op->schema().name(), "torchvision::nms"); + + torch::jit::Stack stack; + at::Tensor boxes = at::rand({50, 4}), scores = at::rand({50}); + double thresh = 0.7; + + torch::jit::push(stack, boxes, scores, thresh); + op->getOperation()(&stack); + at::Tensor output_jit; + torch::jit::pop(stack, output_jit); + + at::Tensor output = vision::ops::nms(boxes, scores, thresh); + ASSERT_TRUE(output_jit.allclose(output)); + +} + +TEST(test_custom_operators, roi_align_visible) { + // make sure that the torchvision ops are visible to the jit interpreter even if + // not explicitly included + auto& ops = torch::jit::getAllOperatorsFor(torch::jit::Symbol::fromQualString("torchvision::roi_align")); + ASSERT_EQ(ops.size(), 1); + + auto& op = ops.front(); + ASSERT_EQ(op->schema().name(), "torchvision::roi_align"); + + torch::jit::Stack stack; + float roi_data[] = { + 0., 0., 0., 5., 5., + 0., 5., 5., 10., 10. + }; + at::Tensor input = at::rand({1, 2, 10, 10}), rois = at::from_blob(roi_data, {2, 5}); + double spatial_scale = 1.0; + int64_t pooled_height = 3, pooled_width = 3, sampling_ratio = -1; + bool aligned = true; + + torch::jit::push(stack, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio, aligned); + op->getOperation()(&stack); + at::Tensor output_jit; + torch::jit::pop(stack, output_jit); + + ASSERT_EQ(output_jit.sizes()[0], 2); + ASSERT_EQ(output_jit.sizes()[1], 2); + ASSERT_EQ(output_jit.sizes()[2], 3); + ASSERT_EQ(output_jit.sizes()[3], 3); +} diff --git a/test/datasets_utils.py b/test/datasets_utils.py new file mode 100644 index 00000000000..60e3990f3a2 --- /dev/null +++ b/test/datasets_utils.py @@ -0,0 +1,850 @@ +import collections.abc +import contextlib +import functools +import importlib +import inspect +import itertools +import os +import pathlib +import random +import string +import unittest +import unittest.mock +from collections import defaultdict +from typing import Any, Callable, Dict, Iterator, List, Optional, Sequence, Tuple, Union + +import PIL +import PIL.Image + +import torch +import torchvision.datasets +import torchvision.io + +from common_utils import get_tmp_dir, disable_console_output + + +__all__ = [ + "UsageError", + "lazy_importer", + "test_all_configs", + "DatasetTestCase", + "ImageDatasetTestCase", + "VideoDatasetTestCase", + "create_image_or_video_tensor", + "create_image_file", + "create_image_folder", + "create_video_file", + "create_video_folder", + "create_random_string", +] + + +class UsageError(Exception): + """Should be raised in case an error happens in the setup rather than the test.""" + + +class LazyImporter: + r"""Lazy importer for additional dependicies. + + Some datasets require additional packages that are no direct dependencies of torchvision. Instances of this class + provide modules listed in MODULES as attributes. They are only imported when accessed. + + """ + MODULES = ( + "av", + "lmdb", + "pycocotools", + "requests", + "scipy.io", + "scipy.sparse", + ) + + def __init__(self): + modules = defaultdict(list) + for module in self.MODULES: + module, *submodules = module.split(".", 1) + if submodules: + modules[module].append(submodules[0]) + else: + # This introduces the module so that it is known when we later iterate over the dictionary. + modules.__missing__(module) + + for module, submodules in modules.items(): + # We need the quirky 'module=module' and submodules=submodules arguments to the lambda since otherwise the + # lookup for these would happen at runtime rather than at definition. Thus, without it, every property + # would try to import the last item in 'modules' + setattr( + type(self), + module, + property(lambda self, module=module, submodules=submodules: LazyImporter._import(module, submodules)), + ) + + @staticmethod + def _import(package, subpackages): + try: + module = importlib.import_module(package) + except ImportError as error: + raise UsageError( + f"Failed to import module '{package}'. " + f"This probably means that the current test case needs '{package}' installed, " + f"but it is not a dependency of torchvision. " + f"You need to install it manually, for example 'pip install {package}'." + ) from error + + for name in subpackages: + importlib.import_module(f".{name}", package=package) + + return module + + +lazy_importer = LazyImporter() + + +def requires_lazy_imports(*modules): + def outer_wrapper(fn): + @functools.wraps(fn) + def inner_wrapper(*args, **kwargs): + for module in modules: + getattr(lazy_importer, module.replace(".", "_")) + return fn(*args, **kwargs) + + return inner_wrapper + + return outer_wrapper + + +def test_all_configs(test): + """Decorator to run test against all configurations. + + Add this as decorator to an arbitrary test to run it against all configurations. This includes + :attr:`DatasetTestCase.DEFAULT_CONFIG` and :attr:`DatasetTestCase.ADDITIONAL_CONFIGS`. + + The current configuration is provided as the first parameter for the test: + + .. code-block:: + + @test_all_configs() + def test_foo(self, config): + pass + + .. note:: + + This will try to remove duplicate configurations. During this process it will not not preserve a potential + ordering of the configurations or an inner ordering of a configuration. + """ + + def maybe_remove_duplicates(configs): + try: + return [dict(config_) for config_ in set(tuple(sorted(config.items())) for config in configs)] + except TypeError: + # A TypeError will be raised if a value of any config is not hashable, e.g. a list. In that case duplicate + # removal would be a lot more elaborate and we simply bail out. + return configs + + @functools.wraps(test) + def wrapper(self): + configs = [] + if self.DEFAULT_CONFIG is not None: + configs.append(self.DEFAULT_CONFIG) + if self.ADDITIONAL_CONFIGS is not None: + configs.extend(self.ADDITIONAL_CONFIGS) + + if not configs: + configs = [self._KWARG_DEFAULTS.copy()] + else: + configs = maybe_remove_duplicates(configs) + + for config in configs: + with self.subTest(**config): + test(self, config) + + return wrapper + + +def combinations_grid(**kwargs): + """Creates a grid of input combinations. + + Each element in the returned sequence is a dictionary containing one possible combination as values. + + Example: + >>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham")) + [ + {'foo': 'bar', 'spam': 'eggs'}, + {'foo': 'bar', 'spam': 'ham'}, + {'foo': 'baz', 'spam': 'eggs'}, + {'foo': 'baz', 'spam': 'ham'} + ] + """ + return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())] + + +class DatasetTestCase(unittest.TestCase): + """Abstract base class for all dataset testcases. + + You have to overwrite the following class attributes: + + - DATASET_CLASS (torchvision.datasets.VisionDataset): Class of dataset to be tested. + - FEATURE_TYPES (Sequence[Any]): Types of the elements returned by index access of the dataset. Instead of + providing these manually, you can instead subclass ``ImageDatasetTestCase`` or ``VideoDatasetTestCase```to + get a reasonable default, that should work for most cases. Each entry of the sequence may be a tuple, + to indicate multiple possible values. + + Optionally, you can overwrite the following class attributes: + + - DEFAULT_CONFIG (Dict[str, Any]): Config that will be used by default. If omitted, this defaults to all + keyword arguments of the dataset minus ``transform``, ``target_transform``, ``transforms``, and + ``download``. Overwrite this if you want to use a default value for a parameter for which the dataset does + not provide one. + - ADDITIONAL_CONFIGS (Sequence[Dict[str, Any]]): Additional configs that should be tested. Each dictionary can + contain an arbitrary combination of dataset parameters that are **not** ``transform``, ``target_transform``, + ``transforms``, or ``download``. + - REQUIRED_PACKAGES (Iterable[str]): Additional dependencies to use the dataset. If these packages are not + available, the tests are skipped. + + Additionally, you need to overwrite the ``inject_fake_data()`` method that provides the data that the tests rely on. + The fake data should resemble the original data as close as necessary, while containing only few examples. During + the creation of the dataset check-, download-, and extract-functions from ``torchvision.datasets.utils`` are + disabled. + + Without further configuration, the testcase will test if + + 1. the dataset raises a :class:`FileNotFoundError` or a :class:`RuntimeError` if the data files are not found or + corrupted, + 2. the dataset inherits from `torchvision.datasets.VisionDataset`, + 3. the dataset can be turned into a string, + 4. the feature types of a returned example matches ``FEATURE_TYPES``, + 5. the number of examples matches the injected fake data, and + 6. the dataset calls ``transform``, ``target_transform``, or ``transforms`` if available when accessing data. + + Case 3. to 6. are tested against all configurations in ``CONFIGS``. + + To add dataset-specific tests, create a new method that takes no arguments with ``test_`` as a name prefix: + + .. code-block:: + + def test_foo(self): + pass + + If you want to run the test against all configs, add the ``@test_all_configs`` decorator to the definition and + accept a single argument: + + .. code-block:: + + @test_all_configs + def test_bar(self, config): + pass + + Within the test you can use the ``create_dataset()`` method that yields the dataset as well as additional + information provided by the ``ìnject_fake_data()`` method: + + .. code-block:: + + def test_baz(self): + with self.create_dataset() as (dataset, info): + pass + """ + + DATASET_CLASS = None + FEATURE_TYPES = None + + DEFAULT_CONFIG = None + ADDITIONAL_CONFIGS = None + REQUIRED_PACKAGES = None + + # These keyword arguments are checked by test_transforms in case they are available in DATASET_CLASS. + _TRANSFORM_KWARGS = { + "transform", + "target_transform", + "transforms", + } + # These keyword arguments get a 'special' treatment and should not be set in DEFAULT_CONFIG or ADDITIONAL_CONFIGS. + _SPECIAL_KWARGS = { + *_TRANSFORM_KWARGS, + "download", + } + + # These fields are populated during setupClass() within _populate_private_class_attributes() + + # This will be a dictionary containing all keyword arguments with their respective default values extracted from + # the dataset constructor. + _KWARG_DEFAULTS = None + # This will be a set of all _SPECIAL_KWARGS that the dataset constructor takes. + _HAS_SPECIAL_KWARG = None + + # These functions are disabled during dataset creation in create_dataset(). + _CHECK_FUNCTIONS = { + "check_md5", + "check_integrity", + } + _DOWNLOAD_EXTRACT_FUNCTIONS = { + "download_url", + "download_file_from_google_drive", + "extract_archive", + "download_and_extract_archive", + } + + def dataset_args(self, tmpdir: str, config: Dict[str, Any]) -> Sequence[Any]: + """Define positional arguments passed to the dataset. + + .. note:: + + The default behavior is only valid if the dataset to be tested has ``root`` as the only required parameter. + Otherwise you need to overwrite this method. + + Args: + tmpdir (str): Path to a temporary directory. For most cases this acts as root directory for the dataset + to be created and in turn also for the fake data injected here. + config (Dict[str, Any]): Configuration that will be passed to the dataset constructor. It provides at least + fields for all dataset parameters with default values. + + Returns: + (Tuple[str]): ``tmpdir`` which corresponds to ``root`` for most datasets. + """ + return (tmpdir,) + + def inject_fake_data(self, tmpdir: str, config: Dict[str, Any]) -> Union[int, Dict[str, Any]]: + """Inject fake data for dataset into a temporary directory. + + During the creation of the dataset the download and extract logic is disabled. Thus, the fake data injected + here needs to resemble the raw data, i.e. the state of the dataset directly after the files are downloaded and + potentially extracted. + + Args: + tmpdir (str): Path to a temporary directory. For most cases this acts as root directory for the dataset + to be created and in turn also for the fake data injected here. + config (Dict[str, Any]): Configuration that will be passed to the dataset constructor. It provides at least + fields for all dataset parameters with default values. + + Needs to return one of the following: + + 1. (int): Number of examples in the dataset to be created, or + 2. (Dict[str, Any]): Additional information about the injected fake data. Must contain the field + ``"num_examples"`` that corresponds to the number of examples in the dataset to be created. + """ + raise NotImplementedError("You need to provide fake data in order for the tests to run.") + + @contextlib.contextmanager + def create_dataset( + self, + config: Optional[Dict[str, Any]] = None, + inject_fake_data: bool = True, + patch_checks: Optional[bool] = None, + **kwargs: Any, + ) -> Iterator[Tuple[torchvision.datasets.VisionDataset, Dict[str, Any]]]: + r"""Create the dataset in a temporary directory. + + The configuration passed to the dataset is populated to contain at least all parameters with default values. + For this the following order of precedence is used: + + 1. Parameters in :attr:`kwargs`. + 2. Configuration in :attr:`config`. + 3. Configuration in :attr:`~DatasetTestCase.DEFAULT_CONFIG`. + 4. Default parameters of the dataset. + + Args: + config (Optional[Dict[str, Any]]): Configuration that will be used to create the dataset. + inject_fake_data (bool): If ``True`` (default) inject the fake data with :meth:`.inject_fake_data` before + creating the dataset. + patch_checks (Optional[bool]): If ``True`` disable integrity check logic while creating the dataset. If + omitted defaults to the same value as ``inject_fake_data``. + **kwargs (Any): Additional parameters passed to the dataset. These parameters take precedence in case they + overlap with ``config``. + + Yields: + dataset (torchvision.dataset.VisionDataset): Dataset. + info (Dict[str, Any]): Additional information about the injected fake data. See :meth:`.inject_fake_data` + for details. + """ + if patch_checks is None: + patch_checks = inject_fake_data + + special_kwargs, other_kwargs = self._split_kwargs(kwargs) + + complete_config = self._KWARG_DEFAULTS.copy() + if self.DEFAULT_CONFIG: + complete_config.update(self.DEFAULT_CONFIG) + if config: + complete_config.update(config) + if other_kwargs: + complete_config.update(other_kwargs) + + if "download" in self._HAS_SPECIAL_KWARG and special_kwargs.get("download", False): + # override download param to False param if its default is truthy + special_kwargs["download"] = False + + patchers = self._patch_download_extract() + if patch_checks: + patchers.update(self._patch_checks()) + + with get_tmp_dir() as tmpdir: + args = self.dataset_args(tmpdir, complete_config) + info = self._inject_fake_data(tmpdir, complete_config) if inject_fake_data else None + + with self._maybe_apply_patches(patchers), disable_console_output(): + dataset = self.DATASET_CLASS(*args, **complete_config, **special_kwargs) + + yield dataset, info + + @classmethod + def setUpClass(cls): + cls._verify_required_public_class_attributes() + cls._populate_private_class_attributes() + cls._process_optional_public_class_attributes() + super().setUpClass() + + @classmethod + def _verify_required_public_class_attributes(cls): + if cls.DATASET_CLASS is None: + raise UsageError( + "The class attribute 'DATASET_CLASS' needs to be overwritten. " + "It should contain the class of the dataset to be tested." + ) + if cls.FEATURE_TYPES is None: + raise UsageError( + "The class attribute 'FEATURE_TYPES' needs to be overwritten. " + "It should contain a sequence of types that the dataset returns when accessed by index." + ) + + @classmethod + def _populate_private_class_attributes(cls): + defaults = [] + for cls_ in cls.DATASET_CLASS.__mro__: + if cls_ is torchvision.datasets.VisionDataset: + break + + argspec = inspect.getfullargspec(cls_.__init__) + + if not argspec.defaults: + continue + + defaults.append( + {kwarg: default for kwarg, default in zip(argspec.args[-len(argspec.defaults):], argspec.defaults)} + ) + + if not argspec.varkw: + break + + kwarg_defaults = dict() + for config in reversed(defaults): + kwarg_defaults.update(config) + + has_special_kwargs = set() + for name in cls._SPECIAL_KWARGS: + if name not in kwarg_defaults: + continue + + del kwarg_defaults[name] + has_special_kwargs.add(name) + + cls._KWARG_DEFAULTS = kwarg_defaults + cls._HAS_SPECIAL_KWARG = has_special_kwargs + + @classmethod + def _process_optional_public_class_attributes(cls): + def check_config(config, name): + special_kwargs = tuple(f"'{name}'" for name in cls._SPECIAL_KWARGS if name in config) + if special_kwargs: + raise UsageError( + f"{name} contains a value for the parameter(s) {', '.join(special_kwargs)}. " + f"These are handled separately by the test case and should not be set here. " + f"If you need to test some custom behavior regarding these parameters, " + f"you need to write a custom test (*not* test case), e.g. test_custom_transform()." + ) + + if cls.DEFAULT_CONFIG is not None: + check_config(cls.DEFAULT_CONFIG, "DEFAULT_CONFIG") + + if cls.ADDITIONAL_CONFIGS is not None: + for idx, config in enumerate(cls.ADDITIONAL_CONFIGS): + check_config(config, f"CONFIGS[{idx}]") + + if cls.REQUIRED_PACKAGES: + missing_pkgs = [] + for pkg in cls.REQUIRED_PACKAGES: + try: + importlib.import_module(pkg) + except ImportError: + missing_pkgs.append(f"'{pkg}'") + + if missing_pkgs: + raise unittest.SkipTest( + f"The package(s) {', '.join(missing_pkgs)} are required to load the dataset " + f"'{cls.DATASET_CLASS.__name__}', but are not installed." + ) + + def _split_kwargs(self, kwargs): + special_kwargs = kwargs.copy() + other_kwargs = {key: special_kwargs.pop(key) for key in set(special_kwargs.keys()) - self._SPECIAL_KWARGS} + return special_kwargs, other_kwargs + + def _inject_fake_data(self, tmpdir, config): + info = self.inject_fake_data(tmpdir, config) + if info is None: + raise UsageError( + "The method 'inject_fake_data' needs to return at least an integer indicating the number of " + "examples for the current configuration." + ) + elif isinstance(info, int): + info = dict(num_examples=info) + elif not isinstance(info, dict): + raise UsageError( + f"The additional information returned by the method 'inject_fake_data' must be either an " + f"integer indicating the number of examples for the current configuration or a dictionary with " + f"the same content. Got {type(info)} instead." + ) + elif "num_examples" not in info: + raise UsageError( + "The information dictionary returned by the method 'inject_fake_data' must contain a " + "'num_examples' field that holds the number of examples for the current configuration." + ) + return info + + def _patch_download_extract(self): + module = inspect.getmodule(self.DATASET_CLASS).__name__ + return {unittest.mock.patch(f"{module}.{function}") for function in self._DOWNLOAD_EXTRACT_FUNCTIONS} + + def _patch_checks(self): + module = inspect.getmodule(self.DATASET_CLASS).__name__ + return {unittest.mock.patch(f"{module}.{function}", return_value=True) for function in self._CHECK_FUNCTIONS} + + @contextlib.contextmanager + def _maybe_apply_patches(self, patchers): + with contextlib.ExitStack() as stack: + mocks = {} + for patcher in patchers: + with contextlib.suppress(AttributeError): + mocks[patcher.target] = stack.enter_context(patcher) + yield mocks + + def test_not_found_or_corrupted(self): + with self.assertRaises((FileNotFoundError, RuntimeError)): + with self.create_dataset(inject_fake_data=False): + pass + + def test_smoke(self): + with self.create_dataset() as (dataset, _): + self.assertIsInstance(dataset, torchvision.datasets.VisionDataset) + + @test_all_configs + def test_str_smoke(self, config): + with self.create_dataset(config) as (dataset, _): + self.assertIsInstance(str(dataset), str) + + @test_all_configs + def test_feature_types(self, config): + with self.create_dataset(config) as (dataset, _): + example = dataset[0] + + if len(self.FEATURE_TYPES) > 1: + actual = len(example) + expected = len(self.FEATURE_TYPES) + self.assertEqual( + actual, + expected, + f"The number of the returned features does not match the the number of elements in FEATURE_TYPES: " + f"{actual} != {expected}", + ) + else: + example = (example,) + + for idx, (feature, expected_feature_type) in enumerate(zip(example, self.FEATURE_TYPES)): + with self.subTest(idx=idx): + self.assertIsInstance(feature, expected_feature_type) + + @test_all_configs + def test_num_examples(self, config): + with self.create_dataset(config) as (dataset, info): + self.assertEqual(len(dataset), info["num_examples"]) + + @test_all_configs + def test_transforms(self, config): + mock = unittest.mock.Mock(wraps=lambda *args: args[0] if len(args) == 1 else args) + for kwarg in self._TRANSFORM_KWARGS: + if kwarg not in self._HAS_SPECIAL_KWARG: + continue + + mock.reset_mock() + + with self.subTest(kwarg=kwarg): + with self.create_dataset(config, **{kwarg: mock}) as (dataset, _): + dataset[0] + + mock.assert_called() + + +class ImageDatasetTestCase(DatasetTestCase): + """Abstract base class for image dataset testcases. + + - Overwrites the FEATURE_TYPES class attribute to expect a :class:`PIL.Image.Image` and an integer label. + """ + + FEATURE_TYPES = (PIL.Image.Image, int) + + @contextlib.contextmanager + def create_dataset( + self, + config: Optional[Dict[str, Any]] = None, + inject_fake_data: bool = True, + patch_checks: Optional[bool] = None, + **kwargs: Any, + ) -> Iterator[Tuple[torchvision.datasets.VisionDataset, Dict[str, Any]]]: + with super().create_dataset( + config=config, + inject_fake_data=inject_fake_data, + patch_checks=patch_checks, + **kwargs, + ) as (dataset, info): + # PIL.Image.open() only loads the image meta data upfront and keeps the file open until the first access + # to the pixel data occurs. Trying to delete such a file results in an PermissionError on Windows. Thus, we + # force-load opened images. + # This problem only occurs during testing since some tests, e.g. DatasetTestCase.test_feature_types open an + # image, but never use the underlying data. During normal operation it is reasonable to assume that the + # user wants to work with the image he just opened rather than deleting the underlying file. + with self._force_load_images(): + yield dataset, info + + @contextlib.contextmanager + def _force_load_images(self): + open = PIL.Image.open + + def new(fp, *args, **kwargs): + image = open(fp, *args, **kwargs) + if isinstance(fp, (str, pathlib.Path)): + image.load() + return image + + with unittest.mock.patch("PIL.Image.open", new=new): + yield + + +class VideoDatasetTestCase(DatasetTestCase): + """Abstract base class for video dataset testcases. + + - Overwrites the 'FEATURE_TYPES' class attribute to expect two :class:`torch.Tensor` s for the video and audio as + well as an integer label. + - Overwrites the 'REQUIRED_PACKAGES' class attribute to require PyAV (``av``). + - Adds the 'DEFAULT_FRAMES_PER_CLIP' class attribute. If no 'frames_per_clip' is provided by 'inject_fake_data()' + and it is the last parameter without a default value in the dataset constructor, the value of the + 'DEFAULT_FRAMES_PER_CLIP' class attribute is appended to the output. + """ + + FEATURE_TYPES = (torch.Tensor, torch.Tensor, int) + REQUIRED_PACKAGES = ("av",) + + DEFAULT_FRAMES_PER_CLIP = 1 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dataset_args = self._set_default_frames_per_clip(self.dataset_args) + + def _set_default_frames_per_clip(self, inject_fake_data): + argspec = inspect.getfullargspec(self.DATASET_CLASS.__init__) + args_without_default = argspec.args[1:-len(argspec.defaults)] + frames_per_clip_last = args_without_default[-1] == "frames_per_clip" + + @functools.wraps(inject_fake_data) + def wrapper(tmpdir, config): + args = inject_fake_data(tmpdir, config) + if frames_per_clip_last and len(args) == len(args_without_default) - 1: + args = (*args, self.DEFAULT_FRAMES_PER_CLIP) + + return args + + return wrapper + + +def create_image_or_video_tensor(size: Sequence[int]) -> torch.Tensor: + r"""Create a random uint8 tensor. + + Args: + size (Sequence[int]): Size of the tensor. + """ + return torch.randint(0, 256, size, dtype=torch.uint8) + + +def create_image_file( + root: Union[pathlib.Path, str], name: Union[pathlib.Path, str], size: Union[Sequence[int], int] = 10, **kwargs: Any +) -> pathlib.Path: + """Create an image file from random data. + + Args: + root (Union[str, pathlib.Path]): Root directory the image file will be placed in. + name (Union[str, pathlib.Path]): Name of the image file. + size (Union[Sequence[int], int]): Size of the image that represents the ``(num_channels, height, width)``. If + scalar, the value is used for the height and width. If not provided, three channels are assumed. + kwargs (Any): Additional parameters passed to :meth:`PIL.Image.Image.save`. + + Returns: + pathlib.Path: Path to the created image file. + """ + if isinstance(size, int): + size = (size, size) + if len(size) == 2: + size = (3, *size) + if len(size) != 3: + raise UsageError( + f"The 'size' argument should either be an int or a sequence of length 2 or 3. Got {len(size)} instead" + ) + + image = create_image_or_video_tensor(size) + file = pathlib.Path(root) / name + + # torch (num_channels x height x width) -> PIL (width x height x num_channels) + image = image.permute(2, 1, 0) + # For grayscale images PIL doesn't use a channel dimension + if image.shape[2] == 1: + image = torch.squeeze(image, 2) + PIL.Image.fromarray(image.numpy()).save(file, **kwargs) + return file + + +def create_image_folder( + root: Union[pathlib.Path, str], + name: Union[pathlib.Path, str], + file_name_fn: Callable[[int], str], + num_examples: int, + size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None, + **kwargs: Any, +) -> List[pathlib.Path]: + """Create a folder of random images. + + Args: + root (Union[str, pathlib.Path]): Root directory the image folder will be placed in. + name (Union[str, pathlib.Path]): Name of the image folder. + file_name_fn (Callable[[int], str]): Should return a file name if called with the file index. + num_examples (int): Number of images to create. + size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the images. If + callable, will be called with the index of the corresponding file. If omitted, a random height and width + between 3 and 10 pixels is selected on a per-image basis. + kwargs (Any): Additional parameters passed to :func:`create_image_file`. + + Returns: + List[pathlib.Path]: Paths to all created image files. + + .. seealso:: + + - :func:`create_image_file` + """ + if size is None: + + def size(idx: int) -> Tuple[int, int, int]: + num_channels = 3 + height, width = torch.randint(3, 11, size=(2,), dtype=torch.int).tolist() + return (num_channels, height, width) + + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) + + return [ + create_image_file(root, file_name_fn(idx), size=size(idx) if callable(size) else size, **kwargs) + for idx in range(num_examples) + ] + + +@requires_lazy_imports("av") +def create_video_file( + root: Union[pathlib.Path, str], + name: Union[pathlib.Path, str], + size: Union[Sequence[int], int] = (1, 3, 10, 10), + fps: float = 25, + **kwargs: Any, +) -> pathlib.Path: + """Create an video file from random data. + + Args: + root (Union[str, pathlib.Path]): Root directory the video file will be placed in. + name (Union[str, pathlib.Path]): Name of the video file. + size (Union[Sequence[int], int]): Size of the video that represents the + ``(num_frames, num_channels, height, width)``. If scalar, the value is used for the height and width. + If not provided, ``num_frames=1`` and ``num_channels=3`` are assumed. + fps (float): Frame rate in frames per second. + kwargs (Any): Additional parameters passed to :func:`torchvision.io.write_video`. + + Returns: + pathlib.Path: Path to the created image file. + + Raises: + UsageError: If PyAV is not available. + """ + if isinstance(size, int): + size = (size, size) + if len(size) == 2: + size = (3, *size) + if len(size) == 3: + size = (1, *size) + if len(size) != 4: + raise UsageError( + f"The 'size' argument should either be an int or a sequence of length 2, 3, or 4. Got {len(size)} instead" + ) + + video = create_image_or_video_tensor(size) + file = pathlib.Path(root) / name + torchvision.io.write_video(str(file), video.permute(0, 2, 3, 1), fps, **kwargs) + return file + + +@requires_lazy_imports("av") +def create_video_folder( + root: Union[str, pathlib.Path], + name: Union[str, pathlib.Path], + file_name_fn: Callable[[int], str], + num_examples: int, + size: Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]] = None, + fps=25, + **kwargs, +) -> List[pathlib.Path]: + """Create a folder of random videos. + + Args: + root (Union[str, pathlib.Path]): Root directory the video folder will be placed in. + name (Union[str, pathlib.Path]): Name of the video folder. + file_name_fn (Callable[[int], str]): Should return a file name if called with the file index. + num_examples (int): Number of videos to create. + size (Optional[Union[Sequence[int], int, Callable[[int], Union[Sequence[int], int]]]]): Size of the videos. If + callable, will be called with the index of the corresponding file. If omitted, a random even height and + width between 4 and 10 pixels is selected on a per-video basis. + fps (float): Frame rate in frames per second. + kwargs (Any): Additional parameters passed to :func:`create_video_file`. + + Returns: + List[pathlib.Path]: Paths to all created video files. + + Raises: + UsageError: If PyAV is not available. + + .. seealso:: + + - :func:`create_video_file` + """ + if size is None: + + def size(idx): + num_frames = 1 + num_channels = 3 + # The 'libx264' video codec, which is the default of torchvision.io.write_video, requires the height and + # width of the video to be divisible by 2. + height, width = (torch.randint(2, 6, size=(2,), dtype=torch.int) * 2).tolist() + return (num_frames, num_channels, height, width) + + root = pathlib.Path(root) / name + os.makedirs(root, exist_ok=True) + + return [ + create_video_file(root, file_name_fn(idx), size=size(idx) if callable(size) else size, **kwargs) + for idx in range(num_examples) + ] + + +def create_random_string(length: int, *digits: str) -> str: + """Create a random string. + + Args: + length (int): Number of characters in the generated string. + *characters (str): Characters to sample from. If omitted defaults to :attr:`string.ascii_lowercase`. + """ + if not digits: + digits = string.ascii_lowercase + else: + digits = "".join(itertools.chain(*digits)) + + return "".join(random.choice(digits) for _ in range(length)) diff --git a/test/expect/ModelTester.test_deeplabv3_mobilenet_v3_large_expect.pkl b/test/expect/ModelTester.test_deeplabv3_mobilenet_v3_large_expect.pkl new file mode 100644 index 00000000000..58d6da6c721 Binary files /dev/null and b/test/expect/ModelTester.test_deeplabv3_mobilenet_v3_large_expect.pkl differ diff --git a/test/expect/ModelTester.test_deeplabv3_resnet101_expect.pkl b/test/expect/ModelTester.test_deeplabv3_resnet101_expect.pkl new file mode 100644 index 00000000000..ac9c251e4e9 Binary files /dev/null and b/test/expect/ModelTester.test_deeplabv3_resnet101_expect.pkl differ diff --git a/test/expect/ModelTester.test_deeplabv3_resnet50_expect.pkl b/test/expect/ModelTester.test_deeplabv3_resnet50_expect.pkl new file mode 100644 index 00000000000..70e70a92e2c Binary files /dev/null and b/test/expect/ModelTester.test_deeplabv3_resnet50_expect.pkl differ diff --git a/test/expect/ModelTester.test_fasterrcnn_mobilenet_v3_large_320_fpn_expect.pkl b/test/expect/ModelTester.test_fasterrcnn_mobilenet_v3_large_320_fpn_expect.pkl new file mode 100644 index 00000000000..94c6261b7fc Binary files /dev/null and b/test/expect/ModelTester.test_fasterrcnn_mobilenet_v3_large_320_fpn_expect.pkl differ diff --git a/test/expect/ModelTester.test_fasterrcnn_mobilenet_v3_large_fpn_expect.pkl b/test/expect/ModelTester.test_fasterrcnn_mobilenet_v3_large_fpn_expect.pkl new file mode 100644 index 00000000000..f3882de4838 Binary files /dev/null and b/test/expect/ModelTester.test_fasterrcnn_mobilenet_v3_large_fpn_expect.pkl differ diff --git a/test/expect/ModelTester.test_fasterrcnn_resnet50_fpn_expect.pkl b/test/expect/ModelTester.test_fasterrcnn_resnet50_fpn_expect.pkl index 3e4fc8ec641..3085bf36e60 100644 Binary files a/test/expect/ModelTester.test_fasterrcnn_resnet50_fpn_expect.pkl and b/test/expect/ModelTester.test_fasterrcnn_resnet50_fpn_expect.pkl differ diff --git a/test/expect/ModelTester.test_fcn_resnet101_expect.pkl b/test/expect/ModelTester.test_fcn_resnet101_expect.pkl new file mode 100644 index 00000000000..ae6f21bb95c Binary files /dev/null and b/test/expect/ModelTester.test_fcn_resnet101_expect.pkl differ diff --git a/test/expect/ModelTester.test_fcn_resnet50_expect.pkl b/test/expect/ModelTester.test_fcn_resnet50_expect.pkl new file mode 100644 index 00000000000..29a2382ed89 Binary files /dev/null and b/test/expect/ModelTester.test_fcn_resnet50_expect.pkl differ diff --git a/test/expect/ModelTester.test_keypointrcnn_resnet50_fpn_expect.pkl b/test/expect/ModelTester.test_keypointrcnn_resnet50_fpn_expect.pkl index a12333d6fb1..0f0d2c92f86 100644 Binary files a/test/expect/ModelTester.test_keypointrcnn_resnet50_fpn_expect.pkl and b/test/expect/ModelTester.test_keypointrcnn_resnet50_fpn_expect.pkl differ diff --git a/test/expect/ModelTester.test_lraspp_mobilenet_v3_large_expect.pkl b/test/expect/ModelTester.test_lraspp_mobilenet_v3_large_expect.pkl new file mode 100644 index 00000000000..b2aa2ca89a9 Binary files /dev/null and b/test/expect/ModelTester.test_lraspp_mobilenet_v3_large_expect.pkl differ diff --git a/test/expect/ModelTester.test_maskrcnn_resnet50_fpn_expect.pkl b/test/expect/ModelTester.test_maskrcnn_resnet50_fpn_expect.pkl index c05342100a3..d8ee673ab60 100644 Binary files a/test/expect/ModelTester.test_maskrcnn_resnet50_fpn_expect.pkl and b/test/expect/ModelTester.test_maskrcnn_resnet50_fpn_expect.pkl differ diff --git a/test/expect/ModelTester.test_mobilenet_v3_large_expect.pkl b/test/expect/ModelTester.test_mobilenet_v3_large_expect.pkl new file mode 100644 index 00000000000..9691daf18c7 Binary files /dev/null and b/test/expect/ModelTester.test_mobilenet_v3_large_expect.pkl differ diff --git a/test/expect/ModelTester.test_mobilenet_v3_small_expect.pkl b/test/expect/ModelTester.test_mobilenet_v3_small_expect.pkl new file mode 100644 index 00000000000..5373739b53c Binary files /dev/null and b/test/expect/ModelTester.test_mobilenet_v3_small_expect.pkl differ diff --git a/test/expect/ModelTester.test_retinanet_resnet50_fpn_expect.pkl b/test/expect/ModelTester.test_retinanet_resnet50_fpn_expect.pkl new file mode 100644 index 00000000000..8bd32f5be88 Binary files /dev/null and b/test/expect/ModelTester.test_retinanet_resnet50_fpn_expect.pkl differ diff --git a/test/fakedata_generation.py b/test/fakedata_generation.py index d14bc0c8304..314222dc43f 100644 --- a/test/fakedata_generation.py +++ b/test/fakedata_generation.py @@ -1,5 +1,4 @@ import os -import sys import contextlib import tarfile import json @@ -7,12 +6,55 @@ import PIL import torch from common_utils import get_tmp_dir +import pickle +import random +from itertools import cycle +from torchvision.io.video import write_video +import unittest.mock +import hashlib +from distutils import dir_util +import re -PYTHON2 = sys.version_info[0] == 2 -if PYTHON2: - import cPickle as pickle -else: - import pickle + +def mock_class_attribute(stack, target, new): + mock = unittest.mock.patch(target, new_callable=unittest.mock.PropertyMock, return_value=new) + stack.enter_context(mock) + return mock + + +def compute_md5(file): + with open(file, "rb") as fh: + return hashlib.md5(fh.read()).hexdigest() + + +def make_tar(root, name, *files, compression=None): + ext = ".tar" + mode = "w" + if compression is not None: + ext = f"{ext}.{compression}" + mode = f"{mode}:{compression}" + + name = os.path.splitext(name)[0] + ext + archive = os.path.join(root, name) + + with tarfile.open(archive, mode) as fh: + for file in files: + fh.add(os.path.join(root, file), arcname=file) + + return name, compute_md5(archive) + + +def clean_dir(root, *keep): + pattern = re.compile(f"({f')|('.join(keep)})") + for file_or_dir in os.listdir(root): + if pattern.search(file_or_dir): + continue + + file_or_dir = os.path.join(root, file_or_dir) + if os.path.isfile(file_or_dir): + os.remove(file_or_dir) + else: + dir_util.remove_tree(file_or_dir) @contextlib.contextmanager @@ -21,7 +63,7 @@ def _encode(v): return torch.tensor(v, dtype=torch.int32).numpy().tobytes()[::-1] def _make_image_file(filename, num_images): - img = torch.randint(0, 255, size=(28 * 28 * num_images,), dtype=torch.uint8) + img = torch.randint(0, 256, size=(28 * 28 * num_images,), dtype=torch.uint8) with open(filename, "wb") as f: f.write(_encode(2051)) # magic header f.write(_encode(num_images)) @@ -102,147 +144,72 @@ def _make_meta_file(file, classes_key): @contextlib.contextmanager -def imagenet_root(): - import scipy.io as sio - - WNID = 'n01234567' - CLS = 'fakedata' +def widerface_root(): + """ + Generates a dataset with the following folder structure and returns the path root: + + └── widerface + ├── wider_face_split + ├── WIDER_train + ├── WIDER_val + └── WIDER_test + + The dataset consist of + 1 image for each dataset split (train, val, test) and annotation files + for each split + """ def _make_image(file): PIL.Image.fromarray(np.zeros((32, 32, 3), dtype=np.uint8)).save(file) - def _make_tar(archive, content, arcname=None, compress=False): - mode = 'w:gz' if compress else 'w' - if arcname is None: - arcname = os.path.basename(content) - with tarfile.open(archive, mode) as fh: - fh.add(content, arcname=arcname) - def _make_train_archive(root): - with get_tmp_dir() as tmp: - wnid_dir = os.path.join(tmp, WNID) - os.mkdir(wnid_dir) - - _make_image(os.path.join(wnid_dir, WNID + '_1.JPEG')) - - wnid_archive = wnid_dir + '.tar' - _make_tar(wnid_archive, wnid_dir) - - train_archive = os.path.join(root, 'ILSVRC2012_img_train.tar') - _make_tar(train_archive, wnid_archive) + extracted_dir = os.path.join(root, 'WIDER_train', 'images', '0--Parade') + os.makedirs(extracted_dir) + _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_1.jpg')) def _make_val_archive(root): - with get_tmp_dir() as tmp: - val_image = os.path.join(tmp, 'ILSVRC2012_val_00000001.JPEG') - _make_image(val_image) - - val_archive = os.path.join(root, 'ILSVRC2012_img_val.tar') - _make_tar(val_archive, val_image) - - def _make_devkit_archive(root): - with get_tmp_dir() as tmp: - data_dir = os.path.join(tmp, 'data') - os.mkdir(data_dir) - - meta_file = os.path.join(data_dir, 'meta.mat') - synsets = np.core.records.fromarrays([ - (0.0, 1.0), - (WNID, ''), - (CLS, ''), - ('fakedata for the torchvision testsuite', ''), - (0.0, 1.0), - ], names=['ILSVRC2012_ID', 'WNID', 'words', 'gloss', 'num_children']) - sio.savemat(meta_file, {'synsets': synsets}) - - groundtruth_file = os.path.join(data_dir, - 'ILSVRC2012_validation_ground_truth.txt') - with open(groundtruth_file, 'w') as fh: - fh.write('0\n') - - devkit_name = 'ILSVRC2012_devkit_t12' - devkit_archive = os.path.join(root, devkit_name + '.tar.gz') - _make_tar(devkit_archive, tmp, arcname=devkit_name, compress=True) + extracted_dir = os.path.join(root, 'WIDER_val', 'images', '0--Parade') + os.makedirs(extracted_dir) + _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_2.jpg')) + + def _make_test_archive(root): + extracted_dir = os.path.join(root, 'WIDER_test', 'images', '0--Parade') + os.makedirs(extracted_dir) + _make_image(os.path.join(extracted_dir, '0_Parade_marchingband_1_3.jpg')) + + def _make_annotations_archive(root): + train_bbox_contents = '0--Parade/0_Parade_marchingband_1_1.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n' + val_bbox_contents = '0--Parade/0_Parade_marchingband_1_2.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n' + test_filelist_contents = '0--Parade/0_Parade_marchingband_1_3.jpg\n' + extracted_dir = os.path.join(root, 'wider_face_split') + os.mkdir(extracted_dir) + + # bbox training file + bbox_file = os.path.join(extracted_dir, "wider_face_train_bbx_gt.txt") + with open(bbox_file, "w") as txt_file: + txt_file.write(train_bbox_contents) + + # bbox validation file + bbox_file = os.path.join(extracted_dir, "wider_face_val_bbx_gt.txt") + with open(bbox_file, "w") as txt_file: + txt_file.write(val_bbox_contents) + + # test filelist file + filelist_file = os.path.join(extracted_dir, "wider_face_test_filelist.txt") + with open(filelist_file, "w") as txt_file: + txt_file.write(test_filelist_contents) with get_tmp_dir() as root: - _make_train_archive(root) - _make_val_archive(root) - _make_devkit_archive(root) + root_base = os.path.join(root, "widerface") + os.mkdir(root_base) + _make_train_archive(root_base) + _make_val_archive(root_base) + _make_test_archive(root_base) + _make_annotations_archive(root_base) yield root -@contextlib.contextmanager -def cityscapes_root(): - - def _make_image(file): - PIL.Image.fromarray(np.zeros((1024, 2048, 3), dtype=np.uint8)).save(file) - - def _make_regular_target(file): - PIL.Image.fromarray(np.zeros((1024, 2048), dtype=np.uint8)).save(file) - - def _make_color_target(file): - PIL.Image.fromarray(np.zeros((1024, 2048, 4), dtype=np.uint8)).save(file) - - def _make_polygon_target(file): - polygon_example = { - 'imgHeight': 1024, - 'imgWidth': 2048, - 'objects': [{'label': 'sky', - 'polygon': [[1241, 0], [1234, 156], - [1478, 197], [1611, 172], - [1606, 0]]}, - {'label': 'road', - 'polygon': [[0, 448], [1331, 274], - [1473, 265], [2047, 605], - [2047, 1023], [0, 1023]]}]} - with open(file, 'w') as outfile: - json.dump(polygon_example, outfile) - - with get_tmp_dir() as tmp_dir: - - for mode in ['Coarse', 'Fine']: - gt_dir = os.path.join(tmp_dir, 'gt%s' % mode) - os.makedirs(gt_dir) - - if mode == 'Coarse': - splits = ['train', 'train_extra', 'val'] - else: - splits = ['train', 'test', 'val'] - - for split in splits: - split_dir = os.path.join(gt_dir, split) - os.makedirs(split_dir) - for city in ['bochum', 'bremen']: - city_dir = os.path.join(split_dir, city) - os.makedirs(city_dir) - _make_color_target(os.path.join(city_dir, - '{city}_000000_000000_gt{mode}_color.png'.format( - city=city, mode=mode))) - _make_regular_target(os.path.join(city_dir, - '{city}_000000_000000_gt{mode}_instanceIds.png'.format( - city=city, mode=mode))) - _make_regular_target(os.path.join(city_dir, - '{city}_000000_000000_gt{mode}_labelIds.png'.format( - city=city, mode=mode))) - _make_polygon_target(os.path.join(city_dir, - '{city}_000000_000000_gt{mode}_polygons.json'.format( - city=city, mode=mode))) - - # leftImg8bit dataset - leftimg_dir = os.path.join(tmp_dir, 'leftImg8bit') - os.makedirs(leftimg_dir) - for split in ['test', 'train_extra', 'train', 'val']: - split_dir = os.path.join(leftimg_dir, split) - os.makedirs(split_dir) - for city in ['bochum', 'bremen']: - city_dir = os.path.join(split_dir, city) - os.makedirs(city_dir) - _make_image(os.path.join(city_dir, - '{city}_000000_000000_leftImg8bit.png'.format(city=city))) - - yield tmp_dir - - @contextlib.contextmanager def svhn_root(): import scipy.io as sio @@ -258,3 +225,182 @@ def _make_mat(file): _make_mat(os.path.join(root, "extra_32x32.mat")) yield root + + +@contextlib.contextmanager +def places365_root(split="train-standard", small=False): + VARIANTS = { + "train-standard": "standard", + "train-challenge": "challenge", + "val": "standard", + } + # {split: file} + DEVKITS = { + "train-standard": "filelist_places365-standard.tar", + "train-challenge": "filelist_places365-challenge.tar", + "val": "filelist_places365-standard.tar", + } + CATEGORIES = "categories_places365.txt" + # {split: file} + FILE_LISTS = { + "train-standard": "places365_train_standard.txt", + "train-challenge": "places365_train_challenge.txt", + "val": "places365_train_standard.txt", + } + # {(split, small): (archive, folder_default, folder_renamed)} + IMAGES = { + ("train-standard", False): ("train_large_places365standard.tar", "data_large", "data_large_standard"), + ("train-challenge", False): ("train_large_places365challenge.tar", "data_large", "data_large_challenge"), + ("val", False): ("val_large.tar", "val_large", "val_large"), + ("train-standard", True): ("train_256_places365standard.tar", "data_256", "data_256_standard"), + ("train-challenge", True): ("train_256_places365challenge.tar", "data_256", "data_256_challenge"), + ("val", True): ("val_256.tar", "val_256", "val_256"), + } + + # (class, idx) + CATEGORIES_CONTENT = (("/a/airfield", 0), ("/a/apartment_building/outdoor", 8), ("/b/badlands", 30)) + # (file, idx) + FILE_LIST_CONTENT = ( + ("Places365_val_00000001.png", 0), + *((f"{category}/Places365_train_00000001.png", idx) for category, idx in CATEGORIES_CONTENT), + ) + + def mock_target(attr, partial="torchvision.datasets.places365.Places365"): + return f"{partial}.{attr}" + + def make_txt(root, name, seq): + file = os.path.join(root, name) + with open(file, "w") as fh: + for string, idx in seq: + fh.write(f"{string} {idx}\n") + return name, compute_md5(file) + + def make_categories_txt(root, name): + return make_txt(root, name, CATEGORIES_CONTENT) + + def make_file_list_txt(root, name): + return make_txt(root, name, FILE_LIST_CONTENT) + + def make_image(file, size): + os.makedirs(os.path.dirname(file), exist_ok=True) + PIL.Image.fromarray(np.zeros((*size, 3), dtype=np.uint8)).save(file) + + def make_devkit_archive(stack, root, split): + archive = DEVKITS[split] + files = [] + + meta = make_categories_txt(root, CATEGORIES) + mock_class_attribute(stack, mock_target("_CATEGORIES_META"), meta) + files.append(meta[0]) + + meta = {split: make_file_list_txt(root, FILE_LISTS[split])} + mock_class_attribute(stack, mock_target("_FILE_LIST_META"), meta) + files.extend([item[0] for item in meta.values()]) + + meta = {VARIANTS[split]: make_tar(root, archive, *files)} + mock_class_attribute(stack, mock_target("_DEVKIT_META"), meta) + + def make_images_archive(stack, root, split, small): + archive, folder_default, folder_renamed = IMAGES[(split, small)] + + image_size = (256, 256) if small else (512, random.randint(512, 1024)) + files, idcs = zip(*FILE_LIST_CONTENT) + images = [file.lstrip("/").replace("/", os.sep) for file in files] + for image in images: + make_image(os.path.join(root, folder_default, image), image_size) + + meta = {(split, small): make_tar(root, archive, folder_default)} + mock_class_attribute(stack, mock_target("_IMAGES_META"), meta) + + return [(os.path.join(root, folder_renamed, image), idx) for image, idx in zip(images, idcs)] + + with contextlib.ExitStack() as stack, get_tmp_dir() as root: + make_devkit_archive(stack, root, split) + class_to_idx = dict(CATEGORIES_CONTENT) + classes = list(class_to_idx.keys()) + + data = {"class_to_idx": class_to_idx, "classes": classes} + data["imgs"] = make_images_archive(stack, root, split, small) + + clean_dir(root, ".tar$") + + yield root, data + + +@contextlib.contextmanager +def stl10_root(_extracted=False): + CLASS_NAMES = ("airplane", "bird") + ARCHIVE_NAME = "stl10_binary" + NUM_FOLDS = 10 + + def mock_target(attr, partial="torchvision.datasets.stl10.STL10"): + return f"{partial}.{attr}" + + def make_binary_file(num_elements, root, name): + file = os.path.join(root, name) + np.zeros(num_elements, dtype=np.uint8).tofile(file) + return name, compute_md5(file) + + def make_image_file(num_images, root, name, num_channels=3, height=96, width=96): + return make_binary_file(num_images * num_channels * height * width, root, name) + + def make_label_file(num_images, root, name): + return make_binary_file(num_images, root, name) + + def make_class_names_file(root, name="class_names.txt"): + with open(os.path.join(root, name), "w") as fh: + for name in CLASS_NAMES: + fh.write(f"{name}\n") + + def make_fold_indices_file(root): + offset = 0 + with open(os.path.join(root, "fold_indices.txt"), "w") as fh: + for fold in range(NUM_FOLDS): + line = " ".join([str(idx) for idx in range(offset, offset + fold + 1)]) + fh.write(f"{line}\n") + offset += fold + 1 + + return tuple(range(1, NUM_FOLDS + 1)) + + def make_train_files(stack, root, num_unlabeled_images=1): + num_images_in_fold = make_fold_indices_file(root) + num_train_images = sum(num_images_in_fold) + + train_list = [ + list(make_image_file(num_train_images, root, "train_X.bin")), + list(make_label_file(num_train_images, root, "train_y.bin")), + list(make_image_file(1, root, "unlabeled_X.bin")) + ] + mock_class_attribute(stack, target=mock_target("train_list"), new=train_list) + + return num_images_in_fold, dict(train=num_train_images, unlabeled=num_unlabeled_images) + + def make_test_files(stack, root, num_images=2): + test_list = [ + list(make_image_file(num_images, root, "test_X.bin")), + list(make_label_file(num_images, root, "test_y.bin")), + ] + mock_class_attribute(stack, target=mock_target("test_list"), new=test_list) + + return dict(test=num_images) + + def make_archive(stack, root, name): + archive, md5 = make_tar(root, name, name, compression="gz") + mock_class_attribute(stack, target=mock_target("tgz_md5"), new=md5) + return archive + + with contextlib.ExitStack() as stack, get_tmp_dir() as root: + archive_folder = os.path.join(root, ARCHIVE_NAME) + os.mkdir(archive_folder) + + num_images_in_folds, num_images_in_split = make_train_files(stack, archive_folder) + num_images_in_split.update(make_test_files(stack, archive_folder)) + + make_class_names_file(archive_folder) + + archive = make_archive(stack, root, ARCHIVE_NAME) + + dir_util.remove_tree(archive_folder) + data = dict(num_images_in_folds=num_images_in_folds, num_images_in_split=num_images_in_split, archive=archive) + + yield root, data diff --git a/test/test_backbone_utils.py b/test/test_backbone_utils.py index 41d54514568..7ee1aed1459 100644 --- a/test/test_backbone_utils.py +++ b/test/test_backbone_utils.py @@ -15,11 +15,11 @@ def test_resnet18_fpn_backbone(self): x = torch.rand(1, 3, 300, 300, dtype=self.dtype, device=device) resnet18_fpn = resnet_fpn_backbone(backbone_name='resnet18', pretrained=False) y = resnet18_fpn(x) - self.assertEqual(list(y.keys()), [0, 1, 2, 3, 'pool']) + self.assertEqual(list(y.keys()), ['0', '1', '2', '3', 'pool']) def test_resnet50_fpn_backbone(self): device = torch.device('cpu') x = torch.rand(1, 3, 300, 300, dtype=self.dtype, device=device) resnet50_fpn = resnet_fpn_backbone(backbone_name='resnet50', pretrained=False) y = resnet50_fpn(x) - self.assertEqual(list(y.keys()), [0, 1, 2, 3, 'pool']) + self.assertEqual(list(y.keys()), ['0', '1', '2', '3', 'pool']) diff --git a/test/test_cpp_models.py b/test/test_cpp_models.py index b6654a0278d..6deb5d79739 100644 --- a/test/test_cpp_models.py +++ b/test/test_cpp_models.py @@ -25,7 +25,8 @@ def process_model(model, tensor, func, name): def read_image1(): - image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg') + image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'encode_jpeg', + 'grace_hopper_517x606.jpg') image = Image.open(image_path) image = image.resize((224, 224)) x = F.to_tensor(image) @@ -33,7 +34,8 @@ def read_image1(): def read_image2(): - image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg') + image_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'encode_jpeg', + 'grace_hopper_517x606.jpg') image = Image.open(image_path) image = image.resize((299, 299)) x = F.to_tensor(image) diff --git a/test/test_datasets.py b/test/test_datasets.py index 2410f18de09..167177deb30 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1,15 +1,32 @@ +import contextlib import sys import os import unittest -import mock +from unittest import mock import numpy as np import PIL from PIL import Image from torch._utils_internal import get_file_path_2 import torchvision +from torchvision.datasets import utils from common_utils import get_tmp_dir -from fakedata_generation import mnist_root, cifar_root, imagenet_root, \ - cityscapes_root, svhn_root +from fakedata_generation import svhn_root, places365_root, widerface_root, stl10_root +import xml.etree.ElementTree as ET +from urllib.request import Request, urlopen +import itertools +import datasets_utils +import pathlib +import pickle +from torchvision import datasets +import torch +import shutil +import json +import random +import bz2 +import torch.nn.functional as F +import string +import io +import zipfile try: @@ -18,8 +35,14 @@ except ImportError: HAS_SCIPY = False +try: + import av + HAS_PYAV = True +except ImportError: + HAS_PYAV = False -class Tester(unittest.TestCase): + +class DatasetTestcase(unittest.TestCase): def generic_classification_dataset_test(self, dataset, num_images=1): self.assertEqual(len(dataset), num_images) img, target = dataset[0] @@ -32,170 +55,8 @@ def generic_segmentation_dataset_test(self, dataset, num_images=1): self.assertTrue(isinstance(img, PIL.Image.Image)) self.assertTrue(isinstance(target, PIL.Image.Image)) - def test_imagefolder(self): - # TODO: create the fake data on-the-fly - FAKEDATA_DIR = get_file_path_2( - os.path.dirname(os.path.abspath(__file__)), 'assets', 'fakedata') - - with get_tmp_dir(src=os.path.join(FAKEDATA_DIR, 'imagefolder')) as root: - classes = sorted(['a', 'b']) - class_a_image_files = [os.path.join(root, 'a', file) - for file in ('a1.png', 'a2.png', 'a3.png')] - class_b_image_files = [os.path.join(root, 'b', file) - for file in ('b1.png', 'b2.png', 'b3.png', 'b4.png')] - dataset = torchvision.datasets.ImageFolder(root, loader=lambda x: x) - - # test if all classes are present - self.assertEqual(classes, sorted(dataset.classes)) - - # test if combination of classes and class_to_index functions correctly - for cls in classes: - self.assertEqual(cls, dataset.classes[dataset.class_to_idx[cls]]) - - # test if all images were detected correctly - class_a_idx = dataset.class_to_idx['a'] - class_b_idx = dataset.class_to_idx['b'] - imgs_a = [(img_file, class_a_idx) for img_file in class_a_image_files] - imgs_b = [(img_file, class_b_idx) for img_file in class_b_image_files] - imgs = sorted(imgs_a + imgs_b) - self.assertEqual(imgs, dataset.imgs) - - # test if the datasets outputs all images correctly - outputs = sorted([dataset[i] for i in range(len(dataset))]) - self.assertEqual(imgs, outputs) - - # redo all tests with specified valid image files - dataset = torchvision.datasets.ImageFolder(root, loader=lambda x: x, - is_valid_file=lambda x: '3' in x) - self.assertEqual(classes, sorted(dataset.classes)) - - class_a_idx = dataset.class_to_idx['a'] - class_b_idx = dataset.class_to_idx['b'] - imgs_a = [(img_file, class_a_idx) for img_file in class_a_image_files - if '3' in img_file] - imgs_b = [(img_file, class_b_idx) for img_file in class_b_image_files - if '3' in img_file] - imgs = sorted(imgs_a + imgs_b) - self.assertEqual(imgs, dataset.imgs) - - outputs = sorted([dataset[i] for i in range(len(dataset))]) - self.assertEqual(imgs, outputs) - - @mock.patch('torchvision.datasets.mnist.download_and_extract_archive') - def test_mnist(self, mock_download_extract): - num_examples = 30 - with mnist_root(num_examples, "MNIST") as root: - dataset = torchvision.datasets.MNIST(root, download=True) - self.generic_classification_dataset_test(dataset, num_images=num_examples) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @mock.patch('torchvision.datasets.mnist.download_and_extract_archive') - def test_kmnist(self, mock_download_extract): - num_examples = 30 - with mnist_root(num_examples, "KMNIST") as root: - dataset = torchvision.datasets.KMNIST(root, download=True) - self.generic_classification_dataset_test(dataset, num_images=num_examples) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @mock.patch('torchvision.datasets.mnist.download_and_extract_archive') - def test_fashionmnist(self, mock_download_extract): - num_examples = 30 - with mnist_root(num_examples, "FashionMNIST") as root: - dataset = torchvision.datasets.FashionMNIST(root, download=True) - self.generic_classification_dataset_test(dataset, num_images=num_examples) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @mock.patch('torchvision.datasets.imagenet._verify_archive') - @unittest.skipIf(not HAS_SCIPY, "scipy unavailable") - def test_imagenet(self, mock_verify): - with imagenet_root() as root: - dataset = torchvision.datasets.ImageNet(root, split='train') - self.generic_classification_dataset_test(dataset) - - dataset = torchvision.datasets.ImageNet(root, split='val') - self.generic_classification_dataset_test(dataset) - - @mock.patch('torchvision.datasets.cifar.check_integrity') - @mock.patch('torchvision.datasets.cifar.CIFAR10._check_integrity') - def test_cifar10(self, mock_ext_check, mock_int_check): - mock_ext_check.return_value = True - mock_int_check.return_value = True - with cifar_root('CIFAR10') as root: - dataset = torchvision.datasets.CIFAR10(root, train=True, download=True) - self.generic_classification_dataset_test(dataset, num_images=5) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - dataset = torchvision.datasets.CIFAR10(root, train=False, download=True) - self.generic_classification_dataset_test(dataset) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @mock.patch('torchvision.datasets.cifar.check_integrity') - @mock.patch('torchvision.datasets.cifar.CIFAR10._check_integrity') - def test_cifar100(self, mock_ext_check, mock_int_check): - mock_ext_check.return_value = True - mock_int_check.return_value = True - with cifar_root('CIFAR100') as root: - dataset = torchvision.datasets.CIFAR100(root, train=True, download=True) - self.generic_classification_dataset_test(dataset) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - dataset = torchvision.datasets.CIFAR100(root, train=False, download=True) - self.generic_classification_dataset_test(dataset) - img, target = dataset[0] - self.assertEqual(dataset.class_to_idx[dataset.classes[0]], target) - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_cityscapes(self): - with cityscapes_root() as root: - - for mode in ['coarse', 'fine']: - - if mode == 'coarse': - splits = ['train', 'train_extra', 'val'] - else: - splits = ['train', 'val', 'test'] - - for split in splits: - for target_type in ['semantic', 'instance']: - dataset = torchvision.datasets.Cityscapes(root, split=split, - target_type=target_type, mode=mode) - self.generic_segmentation_dataset_test(dataset, num_images=2) - - color_dataset = torchvision.datasets.Cityscapes(root, split=split, - target_type='color', mode=mode) - color_img, color_target = color_dataset[0] - self.assertTrue(isinstance(color_img, PIL.Image.Image)) - self.assertTrue(np.array(color_target).shape[2] == 4) - - polygon_dataset = torchvision.datasets.Cityscapes(root, split=split, - target_type='polygon', mode=mode) - polygon_img, polygon_target = polygon_dataset[0] - self.assertTrue(isinstance(polygon_img, PIL.Image.Image)) - self.assertTrue(isinstance(polygon_target, dict)) - self.assertTrue(isinstance(polygon_target['imgHeight'], int)) - self.assertTrue(isinstance(polygon_target['objects'], list)) - - # Test multiple target types - targets_combo = ['semantic', 'polygon', 'color'] - multiple_types_dataset = torchvision.datasets.Cityscapes(root, split=split, - target_type=targets_combo, - mode=mode) - output = multiple_types_dataset[0] - self.assertTrue(isinstance(output, tuple)) - self.assertTrue(len(output) == 2) - self.assertTrue(isinstance(output[0], PIL.Image.Image)) - self.assertTrue(isinstance(output[1], tuple)) - self.assertTrue(len(output[1]) == 3) - self.assertTrue(isinstance(output[1][0], PIL.Image.Image)) # semantic - self.assertTrue(isinstance(output[1][1], dict)) # polygon - self.assertTrue(isinstance(output[1][2], PIL.Image.Image)) # color +class Tester(DatasetTestcase): @mock.patch('torchvision.datasets.SVHN._check_integrity') @unittest.skipIf(not HAS_SCIPY, "scipy unavailable") def test_svhn(self, mock_check): @@ -210,6 +71,1671 @@ def test_svhn(self, mock_check): dataset = torchvision.datasets.SVHN(root, split="extra") self.generic_classification_dataset_test(dataset, num_images=2) + def test_places365(self): + for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)): + with places365_root(split=split, small=small) as places365: + root, data = places365 + + dataset = torchvision.datasets.Places365(root, split=split, small=small, download=True) + self.generic_classification_dataset_test(dataset, num_images=len(data["imgs"])) + + def test_places365_transforms(self): + expected_image = "image" + expected_target = "target" + + def transform(image): + return expected_image + + def target_transform(target): + return expected_target + + with places365_root() as places365: + root, data = places365 + + dataset = torchvision.datasets.Places365( + root, transform=transform, target_transform=target_transform, download=True + ) + actual_image, actual_target = dataset[0] + + self.assertEqual(actual_image, expected_image) + self.assertEqual(actual_target, expected_target) + + def test_places365_devkit_download(self): + for split in ("train-standard", "train-challenge", "val"): + with self.subTest(split=split): + with places365_root(split=split) as places365: + root, data = places365 + + dataset = torchvision.datasets.Places365(root, split=split, download=True) + + with self.subTest("classes"): + self.assertSequenceEqual(dataset.classes, data["classes"]) + + with self.subTest("class_to_idx"): + self.assertDictEqual(dataset.class_to_idx, data["class_to_idx"]) + + with self.subTest("imgs"): + self.assertSequenceEqual(dataset.imgs, data["imgs"]) + + def test_places365_devkit_no_download(self): + for split in ("train-standard", "train-challenge", "val"): + with self.subTest(split=split): + with places365_root(split=split) as places365: + root, data = places365 + + with self.assertRaises(RuntimeError): + torchvision.datasets.Places365(root, split=split, download=False) + + def test_places365_images_download(self): + for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)): + with self.subTest(split=split, small=small): + with places365_root(split=split, small=small) as places365: + root, data = places365 + + dataset = torchvision.datasets.Places365(root, split=split, small=small, download=True) + + assert all(os.path.exists(item[0]) for item in dataset.imgs) + + def test_places365_images_download_preexisting(self): + split = "train-standard" + small = False + images_dir = "data_large_standard" + + with places365_root(split=split, small=small) as places365: + root, data = places365 + os.mkdir(os.path.join(root, images_dir)) + + with self.assertRaises(RuntimeError): + torchvision.datasets.Places365(root, split=split, small=small, download=True) + + def test_places365_repr_smoke(self): + with places365_root() as places365: + root, data = places365 + + dataset = torchvision.datasets.Places365(root, download=True) + self.assertIsInstance(repr(dataset), str) + + +class STL10Tester(DatasetTestcase): + @contextlib.contextmanager + def mocked_root(self): + with stl10_root() as (root, data): + yield root, data + + @contextlib.contextmanager + def mocked_dataset(self, pre_extract=False, download=True, **kwargs): + with self.mocked_root() as (root, data): + if pre_extract: + utils.extract_archive(os.path.join(root, data["archive"])) + dataset = torchvision.datasets.STL10(root, download=download, **kwargs) + yield dataset, data + + def test_not_found(self): + with self.assertRaises(RuntimeError): + with self.mocked_dataset(download=False): + pass + + def test_splits(self): + for split in ('train', 'train+unlabeled', 'unlabeled', 'test'): + with self.mocked_dataset(split=split) as (dataset, data): + num_images = sum([data["num_images_in_split"][part] for part in split.split("+")]) + self.generic_classification_dataset_test(dataset, num_images=num_images) + + def test_folds(self): + for fold in range(10): + with self.mocked_dataset(split="train", folds=fold) as (dataset, data): + num_images = data["num_images_in_folds"][fold] + self.assertEqual(len(dataset), num_images) + + def test_invalid_folds1(self): + with self.assertRaises(ValueError): + with self.mocked_dataset(folds=10): + pass + + def test_invalid_folds2(self): + with self.assertRaises(ValueError): + with self.mocked_dataset(folds="0"): + pass + + def test_transforms(self): + expected_image = "image" + expected_target = "target" + + def transform(image): + return expected_image + + def target_transform(target): + return expected_target + + with self.mocked_dataset(transform=transform, target_transform=target_transform) as (dataset, _): + actual_image, actual_target = dataset[0] + + self.assertEqual(actual_image, expected_image) + self.assertEqual(actual_target, expected_target) + + def test_unlabeled(self): + with self.mocked_dataset(split="unlabeled") as (dataset, _): + labels = [dataset[idx][1] for idx in range(len(dataset))] + self.assertTrue(all([label == -1 for label in labels])) + + @unittest.mock.patch("torchvision.datasets.stl10.download_and_extract_archive") + def test_download_preexisting(self, mock): + with self.mocked_dataset(pre_extract=True) as (dataset, data): + mock.assert_not_called() + + def test_repr_smoke(self): + with self.mocked_dataset() as (dataset, _): + self.assertIsInstance(repr(dataset), str) + + +class Caltech101TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Caltech101 + FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple)) + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + target_type=("category", "annotation", ["category", "annotation"]) + ) + REQUIRED_PACKAGES = ("scipy",) + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) / "caltech101" + images = root / "101_ObjectCategories" + annotations = root / "Annotations" + + categories = (("Faces", "Faces_2"), ("helicopter", "helicopter"), ("ying_yang", "ying_yang")) + num_images_per_category = 2 + + for image_category, annotation_category in categories: + datasets_utils.create_image_folder( + root=images, + name=image_category, + file_name_fn=lambda idx: f"image_{idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + self._create_annotation_folder( + root=annotations, + name=annotation_category, + file_name_fn=lambda idx: f"annotation_{idx + 1:04d}.mat", + num_examples=num_images_per_category, + ) + + # This is included in the original archive, but is removed by the dataset. Thus, an empty directory suffices. + os.makedirs(images / "BACKGROUND_Google") + + return num_images_per_category * len(categories) + + def _create_annotation_folder(self, root, name, file_name_fn, num_examples): + root = pathlib.Path(root) / name + os.makedirs(root) + + for idx in range(num_examples): + self._create_annotation_file(root, file_name_fn(idx)) + + def _create_annotation_file(self, root, name): + mdict = dict(obj_contour=torch.rand((2, torch.randint(3, 6, size=())), dtype=torch.float64).numpy()) + datasets_utils.lazy_importer.scipy.io.savemat(str(pathlib.Path(root) / name), mdict) + + def test_combined_targets(self): + target_types = ["category", "annotation"] + + individual_targets = [] + for target_type in target_types: + with self.create_dataset(target_type=target_type) as (dataset, _): + _, target = dataset[0] + individual_targets.append(target) + + with self.create_dataset(target_type=target_types) as (dataset, _): + _, combined_targets = dataset[0] + + actual = len(individual_targets) + expected = len(combined_targets) + self.assertEqual( + actual, + expected, + f"The number of the returned combined targets does not match the the number targets if requested " + f"individually: {actual} != {expected}", + ) + + for target_type, combined_target, individual_target in zip(target_types, combined_targets, individual_targets): + with self.subTest(target_type=target_type): + actual = type(combined_target) + expected = type(individual_target) + self.assertIs( + actual, + expected, + f"Type of the combined target does not match the type of the corresponding individual target: " + f"{actual} is not {expected}", + ) + + +class Caltech256TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Caltech256 + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) / "caltech256" / "256_ObjectCategories" + + categories = ((1, "ak47"), (127, "laptop-101"), (257, "clutter")) + num_images_per_category = 2 + + for idx, category in categories: + datasets_utils.create_image_folder( + tmpdir, + name=f"{idx:03d}.{category}", + file_name_fn=lambda image_idx: f"{idx:03d}_{image_idx + 1:04d}.jpg", + num_examples=num_images_per_category, + ) + + return num_images_per_category * len(categories) + + +class WIDERFaceTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.WIDERFace + FEATURE_TYPES = (PIL.Image.Image, (dict, type(None))) # test split returns None as target + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=('train', 'val', 'test')) + + def inject_fake_data(self, tmpdir, config): + widerface_dir = pathlib.Path(tmpdir) / 'widerface' + annotations_dir = widerface_dir / 'wider_face_split' + os.makedirs(annotations_dir) + + split_to_idx = split_to_num_examples = { + "train": 1, + "val": 2, + "test": 3, + } + + # We need to create all folders regardless of the split in config + for split in ('train', 'val', 'test'): + split_idx = split_to_idx[split] + num_examples = split_to_num_examples[split] + + datasets_utils.create_image_folder( + root=tmpdir, + name=widerface_dir / f'WIDER_{split}' / 'images' / '0--Parade', + file_name_fn=lambda image_idx: f"0_Parade_marchingband_1_{split_idx + image_idx}.jpg", + num_examples=num_examples, + ) + + annotation_file_name = { + 'train': annotations_dir / 'wider_face_train_bbx_gt.txt', + 'val': annotations_dir / 'wider_face_val_bbx_gt.txt', + 'test': annotations_dir / 'wider_face_test_filelist.txt', + }[split] + + annotation_content = { + "train": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n449 330 122 149 0 0 0 0 0 0\n" + for image_idx in range(num_examples) + ), + "val": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n1\n501 160 285 443 0 0 0 0 0 0\n" + for image_idx in range(num_examples) + ), + "test": "".join( + f"0--Parade/0_Parade_marchingband_1_{split_idx + image_idx}.jpg\n" + for image_idx in range(num_examples) + ), + }[split] + + with open(annotation_file_name, "w") as annotation_file: + annotation_file.write(annotation_content) + + return split_to_num_examples[config["split"]] + + +class CityScapesTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Cityscapes + TARGET_TYPES = ( + "instance", + "semantic", + "polygon", + "color", + ) + ADDITIONAL_CONFIGS = ( + *datasets_utils.combinations_grid( + mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES + ), + *datasets_utils.combinations_grid( + mode=("coarse",), + split=("train", "train_extra", "val"), + target_type=TARGET_TYPES, + ), + ) + FEATURE_TYPES = (PIL.Image.Image, (dict, PIL.Image.Image)) + + def inject_fake_data(self, tmpdir, config): + + tmpdir = pathlib.Path(tmpdir) + + mode_to_splits = { + "Coarse": ["train", "train_extra", "val"], + "Fine": ["train", "test", "val"], + } + + if config["split"] == "train": # just for coverage of the number of samples + cities = ["bochum", "bremen"] + else: + cities = ["bochum"] + + polygon_target = { + "imgHeight": 1024, + "imgWidth": 2048, + "objects": [ + { + "label": "sky", + "polygon": [ + [1241, 0], + [1234, 156], + [1478, 197], + [1611, 172], + [1606, 0], + ], + }, + { + "label": "road", + "polygon": [ + [0, 448], + [1331, 274], + [1473, 265], + [2047, 605], + [2047, 1023], + [0, 1023], + ], + }, + ], + } + + for mode in ["Coarse", "Fine"]: + gt_dir = tmpdir / f"gt{mode}" + for split in mode_to_splits[mode]: + for city in cities: + def make_image(name, size=10): + datasets_utils.create_image_folder( + root=gt_dir / split, + name=city, + file_name_fn=lambda _: name, + size=size, + num_examples=1, + ) + make_image(f"{city}_000000_000000_gt{mode}_instanceIds.png") + make_image(f"{city}_000000_000000_gt{mode}_labelIds.png") + make_image(f"{city}_000000_000000_gt{mode}_color.png", size=(4, 10, 10)) + + polygon_target_name = gt_dir / split / city / f"{city}_000000_000000_gt{mode}_polygons.json" + with open(polygon_target_name, "w") as outfile: + json.dump(polygon_target, outfile) + + # Create leftImg8bit folder + for split in ['test', 'train_extra', 'train', 'val']: + for city in cities: + datasets_utils.create_image_folder( + root=tmpdir / "leftImg8bit" / split, + name=city, + file_name_fn=lambda _: f"{city}_000000_000000_leftImg8bit.png", + num_examples=1, + ) + + info = {'num_examples': len(cities)} + if config['target_type'] == 'polygon': + info['expected_polygon_target'] = polygon_target + return info + + def test_combined_targets(self): + target_types = ['semantic', 'polygon', 'color'] + + with self.create_dataset(target_type=target_types) as (dataset, _): + output = dataset[0] + self.assertTrue(isinstance(output, tuple)) + self.assertTrue(len(output) == 2) + self.assertTrue(isinstance(output[0], PIL.Image.Image)) + self.assertTrue(isinstance(output[1], tuple)) + self.assertTrue(len(output[1]) == 3) + self.assertTrue(isinstance(output[1][0], PIL.Image.Image)) # semantic + self.assertTrue(isinstance(output[1][1], dict)) # polygon + self.assertTrue(isinstance(output[1][2], PIL.Image.Image)) # color + + def test_feature_types_target_color(self): + with self.create_dataset(target_type='color') as (dataset, _): + color_img, color_target = dataset[0] + self.assertTrue(isinstance(color_img, PIL.Image.Image)) + self.assertTrue(np.array(color_target).shape[2] == 4) + + def test_feature_types_target_polygon(self): + with self.create_dataset(target_type='polygon') as (dataset, info): + polygon_img, polygon_target = dataset[0] + self.assertTrue(isinstance(polygon_img, PIL.Image.Image)) + self.assertEqual(polygon_target, info['expected_polygon_target']) + + +class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ImageNet + REQUIRED_PACKAGES = ('scipy',) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=('train', 'val')) + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + wnid = 'n01234567' + if config['split'] == 'train': + num_examples = 3 + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / 'train' / wnid / wnid, + file_name_fn=lambda image_idx: f"{wnid}_{image_idx}.JPEG", + num_examples=num_examples, + ) + else: + num_examples = 1 + datasets_utils.create_image_folder( + root=tmpdir, + name=tmpdir / 'val' / wnid, + file_name_fn=lambda image_ifx: "ILSVRC2012_val_0000000{image_idx}.JPEG", + num_examples=num_examples, + ) + + wnid_to_classes = {wnid: [1]} + torch.save((wnid_to_classes, None), tmpdir / 'meta.bin') + return num_examples + + +class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CIFAR10 + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + + _VERSION_CONFIG = dict( + base_folder="cifar-10-batches-py", + train_files=tuple(f"data_batch_{idx}" for idx in range(1, 6)), + test_files=("test_batch",), + labels_key="labels", + meta_file="batches.meta", + num_categories=10, + categories_key="label_names", + ) + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) / self._VERSION_CONFIG["base_folder"] + os.makedirs(tmpdir) + + num_images_per_file = 1 + for name in itertools.chain(self._VERSION_CONFIG["train_files"], self._VERSION_CONFIG["test_files"]): + self._create_batch_file(tmpdir, name, num_images_per_file) + + categories = self._create_meta_file(tmpdir) + + return dict( + num_examples=num_images_per_file + * len(self._VERSION_CONFIG["train_files"] if config["train"] else self._VERSION_CONFIG["test_files"]), + categories=categories, + ) + + def _create_batch_file(self, root, name, num_images): + data = datasets_utils.create_image_or_video_tensor((num_images, 32 * 32 * 3)) + labels = np.random.randint(0, self._VERSION_CONFIG["num_categories"], size=num_images).tolist() + self._create_binary_file(root, name, {"data": data, self._VERSION_CONFIG["labels_key"]: labels}) + + def _create_meta_file(self, root): + categories = [ + f"{idx:0{len(str(self._VERSION_CONFIG['num_categories'] - 1))}d}" + for idx in range(self._VERSION_CONFIG["num_categories"]) + ] + self._create_binary_file( + root, self._VERSION_CONFIG["meta_file"], {self._VERSION_CONFIG["categories_key"]: categories} + ) + return categories + + def _create_binary_file(self, root, name, content): + with open(pathlib.Path(root) / name, "wb") as fh: + pickle.dump(content, fh) + + def test_class_to_idx(self): + with self.create_dataset() as (dataset, info): + expected = {category: label for label, category in enumerate(info["categories"])} + actual = dataset.class_to_idx + self.assertEqual(actual, expected) + + +class CIFAR100(CIFAR10TestCase): + DATASET_CLASS = datasets.CIFAR100 + + _VERSION_CONFIG = dict( + base_folder="cifar-100-python", + train_files=("train",), + test_files=("test",), + labels_key="fine_labels", + meta_file="meta", + num_categories=100, + categories_key="fine_label_names", + ) + + +class CelebATestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CelebA + FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None))) + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("train", "valid", "test", "all"), + target_type=("attr", "identity", "bbox", "landmarks", ["attr", "identity"]), + ) + + _SPLIT_TO_IDX = dict(train=0, valid=1, test=2) + + def inject_fake_data(self, tmpdir, config): + base_folder = pathlib.Path(tmpdir) / "celeba" + os.makedirs(base_folder) + + num_images, num_images_per_split = self._create_split_txt(base_folder) + + datasets_utils.create_image_folder( + base_folder, "img_align_celeba", lambda idx: f"{idx + 1:06d}.jpg", num_images + ) + attr_names = self._create_attr_txt(base_folder, num_images) + self._create_identity_txt(base_folder, num_images) + self._create_bbox_txt(base_folder, num_images) + self._create_landmarks_txt(base_folder, num_images) + + return dict(num_examples=num_images_per_split[config["split"]], attr_names=attr_names) + + def _create_split_txt(self, root): + num_images_per_split = dict(train=3, valid=2, test=1) + + data = [ + [self._SPLIT_TO_IDX[split]] for split, num_images in num_images_per_split.items() for _ in range(num_images) + ] + self._create_txt(root, "list_eval_partition.txt", data) + + num_images_per_split["all"] = num_images = sum(num_images_per_split.values()) + return num_images, num_images_per_split + + def _create_attr_txt(self, root, num_images): + header = ("5_o_Clock_Shadow", "Young") + data = torch.rand((num_images, len(header))).ge(0.5).int().mul(2).sub(1).tolist() + self._create_txt(root, "list_attr_celeba.txt", data, header=header, add_num_examples=True) + return header + + def _create_identity_txt(self, root, num_images): + data = torch.randint(1, 4, size=(num_images, 1)).tolist() + self._create_txt(root, "identity_CelebA.txt", data) + + def _create_bbox_txt(self, root, num_images): + header = ("x_1", "y_1", "width", "height") + data = torch.randint(10, size=(num_images, len(header))).tolist() + self._create_txt( + root, "list_bbox_celeba.txt", data, header=header, add_num_examples=True, add_image_id_to_header=True + ) + + def _create_landmarks_txt(self, root, num_images): + header = ("lefteye_x", "rightmouth_y") + data = torch.randint(10, size=(num_images, len(header))).tolist() + self._create_txt(root, "list_landmarks_align_celeba.txt", data, header=header, add_num_examples=True) + + def _create_txt(self, root, name, data, header=None, add_num_examples=False, add_image_id_to_header=False): + with open(pathlib.Path(root) / name, "w") as fh: + if add_num_examples: + fh.write(f"{len(data)}\n") + + if header: + if add_image_id_to_header: + header = ("image_id", *header) + fh.write(f"{' '.join(header)}\n") + + for idx, line in enumerate(data, 1): + fh.write(f"{' '.join((f'{idx:06d}.jpg', *[str(value) for value in line]))}\n") + + def test_combined_targets(self): + target_types = ["attr", "identity", "bbox", "landmarks"] + + individual_targets = [] + for target_type in target_types: + with self.create_dataset(target_type=target_type) as (dataset, _): + _, target = dataset[0] + individual_targets.append(target) + + with self.create_dataset(target_type=target_types) as (dataset, _): + _, combined_targets = dataset[0] + + actual = len(individual_targets) + expected = len(combined_targets) + self.assertEqual( + actual, + expected, + f"The number of the returned combined targets does not match the the number targets if requested " + f"individually: {actual} != {expected}", + ) + + for target_type, combined_target, individual_target in zip(target_types, combined_targets, individual_targets): + with self.subTest(target_type=target_type): + actual = type(combined_target) + expected = type(individual_target) + self.assertIs( + actual, + expected, + f"Type of the combined target does not match the type of the corresponding individual target: " + f"{actual} is not {expected}", + ) + + def test_no_target(self): + with self.create_dataset(target_type=[]) as (dataset, _): + _, target = dataset[0] + + self.assertIsNone(target) + + def test_attr_names(self): + with self.create_dataset() as (dataset, info): + self.assertEqual(tuple(dataset.attr_names), info["attr_names"]) + + +class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.VOCSegmentation + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image) + + ADDITIONAL_CONFIGS = ( + *datasets_utils.combinations_grid( + year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval") + ), + dict(year="2007", image_set="test"), + dict(year="2007-test", image_set="test"), + ) + + def inject_fake_data(self, tmpdir, config): + year, is_test_set = ( + ("2007", True) + if config["year"] == "2007-test" or config["image_set"] == "test" + else (config["year"], False) + ) + image_set = config["image_set"] + + base_dir = pathlib.Path(tmpdir) + if year == "2011": + base_dir /= "TrainVal" + base_dir = base_dir / "VOCdevkit" / f"VOC{year}" + os.makedirs(base_dir) + + num_images, num_images_per_image_set = self._create_image_set_files(base_dir, "ImageSets", is_test_set) + datasets_utils.create_image_folder(base_dir, "JPEGImages", lambda idx: f"{idx:06d}.jpg", num_images) + + datasets_utils.create_image_folder(base_dir, "SegmentationClass", lambda idx: f"{idx:06d}.png", num_images) + annotation = self._create_annotation_files(base_dir, "Annotations", num_images) + + return dict(num_examples=num_images_per_image_set[image_set], annotation=annotation) + + def _create_image_set_files(self, root, name, is_test_set): + root = pathlib.Path(root) / name + src = pathlib.Path(root) / "Main" + os.makedirs(src, exist_ok=True) + + idcs = dict(train=(0, 1, 2), val=(3, 4), test=(5,)) + idcs["trainval"] = (*idcs["train"], *idcs["val"]) + + for image_set in ("test",) if is_test_set else ("train", "val", "trainval"): + self._create_image_set_file(src, image_set, idcs[image_set]) + + shutil.copytree(src, root / "Segmentation") + + num_images = max(itertools.chain(*idcs.values())) + 1 + num_images_per_image_set = dict([(image_set, len(idcs_)) for image_set, idcs_ in idcs.items()]) + return num_images, num_images_per_image_set + + def _create_image_set_file(self, root, image_set, idcs): + with open(pathlib.Path(root) / f"{image_set}.txt", "w") as fh: + fh.writelines([f"{idx:06d}\n" for idx in idcs]) + + def _create_annotation_files(self, root, name, num_images): + root = pathlib.Path(root) / name + os.makedirs(root) + + for idx in range(num_images): + annotation = self._create_annotation_file(root, f"{idx:06d}.xml") + + return annotation + + def _create_annotation_file(self, root, name): + def add_child(parent, name, text=None): + child = ET.SubElement(parent, name) + child.text = text + return child + + def add_name(obj, name="dog"): + add_child(obj, "name", name) + return name + + def add_bndbox(obj, bndbox=None): + if bndbox is None: + bndbox = {"xmin": "1", "xmax": "2", "ymin": "3", "ymax": "4"} + + obj = add_child(obj, "bndbox") + for name, text in bndbox.items(): + add_child(obj, name, text) + + return bndbox + + annotation = ET.Element("annotation") + obj = add_child(annotation, "object") + data = dict(name=add_name(obj), bndbox=add_bndbox(obj)) + + with open(pathlib.Path(root) / name, "wb") as fh: + fh.write(ET.tostring(annotation)) + + return data + + +class VOCDetectionTestCase(VOCSegmentationTestCase): + DATASET_CLASS = datasets.VOCDetection + FEATURE_TYPES = (PIL.Image.Image, dict) + + def test_annotations(self): + with self.create_dataset() as (dataset, info): + _, target = dataset[0] + + self.assertIn("annotation", target) + annotation = target["annotation"] + + self.assertIn("object", annotation) + objects = annotation["object"] + + self.assertEqual(len(objects), 1) + object = objects[0] + + self.assertEqual(object, info["annotation"]) + + +class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.CocoDetection + FEATURE_TYPES = (PIL.Image.Image, list) + + REQUIRED_PACKAGES = ("pycocotools",) + + _IMAGE_FOLDER = "images" + _ANNOTATIONS_FOLDER = "annotations" + _ANNOTATIONS_FILE = "annotations.json" + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._IMAGE_FOLDER + annotation_file = tmpdir / self._ANNOTATIONS_FOLDER / self._ANNOTATIONS_FILE + return root, annotation_file + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + num_images = 3 + num_annotations_per_image = 2 + + files = datasets_utils.create_image_folder( + tmpdir, name=self._IMAGE_FOLDER, file_name_fn=lambda idx: f"{idx:012d}.jpg", num_examples=num_images + ) + file_names = [file.relative_to(tmpdir / self._IMAGE_FOLDER) for file in files] + + annotation_folder = tmpdir / self._ANNOTATIONS_FOLDER + os.makedirs(annotation_folder) + info = self._create_annotation_file( + annotation_folder, self._ANNOTATIONS_FILE, file_names, num_annotations_per_image + ) + + info["num_examples"] = num_images + return info + + def _create_annotation_file(self, root, name, file_names, num_annotations_per_image): + image_ids = [int(file_name.stem) for file_name in file_names] + images = [dict(file_name=str(file_name), id=id) for file_name, id in zip(file_names, image_ids)] + + annotations, info = self._create_annotations(image_ids, num_annotations_per_image) + self._create_json(root, name, dict(images=images, annotations=annotations)) + + return info + + def _create_annotations(self, image_ids, num_annotations_per_image): + annotations = datasets_utils.combinations_grid( + image_id=image_ids, bbox=([1.0, 2.0, 3.0, 4.0],) * num_annotations_per_image + ) + for id, annotation in enumerate(annotations): + annotation["id"] = id + return annotations, dict() + + def _create_json(self, root, name, content): + file = pathlib.Path(root) / name + with open(file, "w") as fh: + json.dump(content, fh) + return file + + +class CocoCaptionsTestCase(CocoDetectionTestCase): + DATASET_CLASS = datasets.CocoCaptions + + def _create_annotations(self, image_ids, num_annotations_per_image): + captions = [str(idx) for idx in range(num_annotations_per_image)] + annotations = datasets_utils.combinations_grid(image_id=image_ids, caption=captions) + for id, annotation in enumerate(annotations): + annotation["id"] = id + return annotations, dict(captions=captions) + + def test_captions(self): + with self.create_dataset() as (dataset, info): + _, captions = dataset[0] + self.assertEqual(tuple(captions), tuple(info["captions"])) + + +class UCF101TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.UCF101 + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + + _VIDEO_FOLDER = "videos" + _ANNOTATIONS_FOLDER = "annotations" + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._VIDEO_FOLDER + annotation_path = tmpdir / self._ANNOTATIONS_FOLDER + return root, annotation_path + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + video_folder = tmpdir / self._VIDEO_FOLDER + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) + + annotations_folder = tmpdir / self._ANNOTATIONS_FOLDER + os.makedirs(annotations_folder) + num_examples = self._create_annotation_files(annotations_folder, video_files, config["fold"], config["train"]) + + return num_examples + + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"v_{cls}_g{(idx // clips_per_group) + 1:02d}_c{(idx % clips_per_group) + 1:02d}.avi" + + video_files = [ + datasets_utils.create_video_folder(root, cls, lambda idx: file_name_fn(cls, idx), num_examples_per_class) + for cls in ("ApplyEyeMakeup", "YoYo") + ] + return [path.relative_to(root) for path in itertools.chain(*video_files)] + + def _create_annotation_files(self, root, video_files, fold, train): + current_videos = random.sample(video_files, random.randrange(1, len(video_files) - 1)) + current_annotation = self._annotation_file_name(fold, train) + self._create_annotation_file(root, current_annotation, current_videos) + + other_videos = set(video_files) - set(current_videos) + other_annotations = [ + self._annotation_file_name(fold, train) for fold, train in itertools.product((1, 2, 3), (True, False)) + ] + other_annotations.remove(current_annotation) + for name in other_annotations: + self._create_annotation_file(root, name, other_videos) + + return len(current_videos) + + def _annotation_file_name(self, fold, train): + return f"{'train' if train else 'test'}list{fold:02d}.txt" + + def _create_annotation_file(self, root, name, video_files): + with open(pathlib.Path(root) / name, "w") as fh: + fh.writelines(f"{file}\n" for file in sorted(video_files)) + + +class LSUNTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.LSUN + + REQUIRED_PACKAGES = ("lmdb",) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"]) + ) + + _CATEGORIES = ( + "bedroom", + "bridge", + "church_outdoor", + "classroom", + "conference_room", + "dining_room", + "kitchen", + "living_room", + "restaurant", + "tower", + ) + + def inject_fake_data(self, tmpdir, config): + root = pathlib.Path(tmpdir) + + num_images = 0 + for cls in self._parse_classes(config["classes"]): + num_images += self._create_lmdb(root, cls) + + return num_images + + @contextlib.contextmanager + def create_dataset( + self, + *args, **kwargs + ): + with super().create_dataset(*args, **kwargs) as output: + yield output + # Currently datasets.LSUN caches the keys in the current directory rather than in the root directory. Thus, + # this creates a number of unique _cache_* files in the current directory that will not be removed together + # with the temporary directory + for file in os.listdir(os.getcwd()): + if file.startswith("_cache_"): + os.remove(file) + + def _parse_classes(self, classes): + if not isinstance(classes, str): + return classes + + split = classes + if split == "test": + return [split] + + return [f"{category}_{split}" for category in self._CATEGORIES] + + def _create_lmdb(self, root, cls): + lmdb = datasets_utils.lazy_importer.lmdb + hexdigits_lowercase = string.digits + string.ascii_lowercase[:6] + + folder = f"{cls}_lmdb" + + num_images = torch.randint(1, 4, size=()).item() + format = "png" + files = datasets_utils.create_image_folder(root, folder, lambda idx: f"{idx}.{format}", num_images) + + with lmdb.open(str(root / folder)) as env, env.begin(write=True) as txn: + for file in files: + key = "".join(random.choice(hexdigits_lowercase) for _ in range(40)).encode() + + buffer = io.BytesIO() + Image.open(file).save(buffer, format) + buffer.seek(0) + value = buffer.read() + + txn.put(key, value) + + os.remove(file) + + return num_images + + def test_not_found_or_corrupted(self): + # LSUN does not raise built-in exception, but a custom one. It is expressive enough to not 'cast' it to + # RuntimeError or FileNotFoundError that are normally checked by this test. + with self.assertRaises(datasets_utils.lazy_importer.lmdb.Error): + super().test_not_found_or_corrupted() + + +class Kinetics400TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.Kinetics400 + + def inject_fake_data(self, tmpdir, config): + classes = ("Abseiling", "Zumba") + num_videos_per_class = 2 + + digits = string.ascii_letters + string.digits + "-_" + for cls in classes: + datasets_utils.create_video_folder( + tmpdir, + cls, + lambda _: f"{datasets_utils.create_random_string(11, digits)}.avi", + num_videos_per_class, + ) + + return num_videos_per_class * len(classes) + + +class HMDB51TestCase(datasets_utils.VideoDatasetTestCase): + DATASET_CLASS = datasets.HMDB51 + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + + _VIDEO_FOLDER = "videos" + _SPLITS_FOLDER = "splits" + _CLASSES = ("brush_hair", "wave") + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._VIDEO_FOLDER + annotation_path = tmpdir / self._SPLITS_FOLDER + return root, annotation_path + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + video_folder = tmpdir / self._VIDEO_FOLDER + os.makedirs(video_folder) + video_files = self._create_videos(video_folder) + + splits_folder = tmpdir / self._SPLITS_FOLDER + os.makedirs(splits_folder) + num_examples = self._create_split_files(splits_folder, video_files, config["fold"], config["train"]) + + return num_examples + + def _create_videos(self, root, num_examples_per_class=3): + def file_name_fn(cls, idx, clips_per_group=2): + return f"{cls}_{(idx // clips_per_group) + 1:d}_{(idx % clips_per_group) + 1:d}.avi" + + return [ + ( + cls, + datasets_utils.create_video_folder( + root, + cls, + lambda idx: file_name_fn(cls, idx), + num_examples_per_class, + ), + ) + for cls in self._CLASSES + ] + + def _create_split_files(self, root, video_files, fold, train): + num_videos = num_train_videos = 0 + + for cls, videos in video_files: + num_videos += len(videos) + + train_videos = set(random.sample(videos, random.randrange(1, len(videos) - 1))) + num_train_videos += len(train_videos) + + with open(pathlib.Path(root) / f"{cls}_test_split{fold}.txt", "w") as fh: + fh.writelines(f"{file.name} {1 if file in train_videos else 2}\n" for file in videos) + + return num_train_videos if train else (num_videos - num_train_videos) + + +class OmniglotTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Omniglot + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(background=(True, False)) + + def inject_fake_data(self, tmpdir, config): + target_folder = ( + pathlib.Path(tmpdir) / "omniglot-py" / f"images_{'background' if config['background'] else 'evaluation'}" + ) + os.makedirs(target_folder) + + num_images = 0 + for name in ("Alphabet_of_the_Magi", "Tifinagh"): + num_images += self._create_alphabet_folder(target_folder, name) + + return num_images + + def _create_alphabet_folder(self, root, name): + num_images_total = 0 + for idx in range(torch.randint(1, 4, size=()).item()): + num_images = torch.randint(1, 4, size=()).item() + num_images_total += num_images + + datasets_utils.create_image_folder( + root / name, f"character{idx:02d}", lambda image_idx: f"{image_idx:02d}.png", num_images + ) + + return num_images_total + + +class SBUTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SBU + FEATURE_TYPES = (PIL.Image.Image, str) + + def inject_fake_data(self, tmpdir, config): + num_images = 3 + + dataset_folder = pathlib.Path(tmpdir) / "dataset" + images = datasets_utils.create_image_folder(tmpdir, "dataset", self._create_file_name, num_images) + + self._create_urls_txt(dataset_folder, images) + self._create_captions_txt(dataset_folder, num_images) + + return num_images + + def _create_file_name(self, idx): + part1 = datasets_utils.create_random_string(10, string.digits) + part2 = datasets_utils.create_random_string(10, string.ascii_lowercase, string.digits[:6]) + return f"{part1}_{part2}.jpg" + + def _create_urls_txt(self, root, images): + with open(root / "SBU_captioned_photo_dataset_urls.txt", "w") as fh: + for image in images: + fh.write( + f"http://static.flickr.com/{datasets_utils.create_random_string(4, string.digits)}/{image.name}\n" + ) + + def _create_captions_txt(self, root, num_images): + with open(root / "SBU_captioned_photo_dataset_captions.txt", "w") as fh: + for _ in range(num_images): + fh.write(f"{datasets_utils.create_random_string(10)}\n") + + +class SEMEIONTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SEMEION + + def inject_fake_data(self, tmpdir, config): + num_images = 3 + + images = torch.rand(num_images, 256) + labels = F.one_hot(torch.randint(10, size=(num_images,))) + with open(pathlib.Path(tmpdir) / "semeion.data", "w") as fh: + for image, one_hot_labels in zip(images, labels): + image_columns = " ".join([f"{pixel.item():.4f}" for pixel in image]) + labels_columns = " ".join([str(label.item()) for label in one_hot_labels]) + fh.write(f"{image_columns} {labels_columns}\n") + + return num_images + + +class USPSTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.USPS + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + + def inject_fake_data(self, tmpdir, config): + num_images = 2 if config["train"] else 1 + + images = torch.rand(num_images, 256) * 2 - 1 + labels = torch.randint(1, 11, size=(num_images,)) + + with bz2.open(pathlib.Path(tmpdir) / f"usps{'.t' if not config['train'] else ''}.bz2", "w") as fh: + for image, label in zip(images, labels): + line = " ".join((str(label.item()), *[f"{idx}:{pixel:.6f}" for idx, pixel in enumerate(image, 1)])) + fh.write(f"{line}\n".encode()) + + return num_images + + +class SBDatasetTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.SBDataset + FEATURE_TYPES = (PIL.Image.Image, (np.ndarray, PIL.Image.Image)) + + REQUIRED_PACKAGES = ("scipy.io", "scipy.sparse") + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + image_set=("train", "val", "train_noval"), mode=("boundaries", "segmentation") + ) + + _NUM_CLASSES = 20 + + def inject_fake_data(self, tmpdir, config): + num_images, num_images_per_image_set = self._create_split_files(tmpdir) + + sizes = self._create_target_folder(tmpdir, "cls", num_images) + + datasets_utils.create_image_folder( + tmpdir, "img", lambda idx: f"{self._file_stem(idx)}.jpg", num_images, size=lambda idx: sizes[idx] + ) + + return num_images_per_image_set[config["image_set"]] + + def _create_split_files(self, root): + root = pathlib.Path(root) + + splits = dict(train=(0, 1, 2), train_noval=(0, 2), val=(3,)) + + for split, idcs in splits.items(): + self._create_split_file(root, split, idcs) + + num_images = max(itertools.chain(*splits.values())) + 1 + num_images_per_split = dict([(split, len(idcs)) for split, idcs in splits.items()]) + return num_images, num_images_per_split + + def _create_split_file(self, root, name, idcs): + with open(root / f"{name}.txt", "w") as fh: + fh.writelines(f"{self._file_stem(idx)}\n" for idx in idcs) + + def _create_target_folder(self, root, name, num_images): + io = datasets_utils.lazy_importer.scipy.io + + target_folder = pathlib.Path(root) / name + os.makedirs(target_folder) + + sizes = [torch.randint(1, 4, size=(2,)).tolist() for _ in range(num_images)] + for idx, size in enumerate(sizes): + content = dict( + GTcls=dict(Boundaries=self._create_boundaries(size), Segmentation=self._create_segmentation(size)) + ) + io.savemat(target_folder / f"{self._file_stem(idx)}.mat", content) + + return sizes + + def _create_boundaries(self, size): + sparse = datasets_utils.lazy_importer.scipy.sparse + return [ + [sparse.csc_matrix(torch.randint(0, 2, size=size, dtype=torch.uint8).numpy())] + for _ in range(self._NUM_CLASSES) + ] + + def _create_segmentation(self, size): + return torch.randint(0, self._NUM_CLASSES + 1, size=size, dtype=torch.uint8).numpy() + + def _file_stem(self, idx): + return f"2008_{idx:06d}" + + +class FakeDataTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.FakeData + FEATURE_TYPES = (PIL.Image.Image, int) + + def dataset_args(self, tmpdir, config): + return () + + def inject_fake_data(self, tmpdir, config): + return config["size"] + + def test_not_found_or_corrupted(self): + self.skipTest("The data is generated at creation and thus cannot be non-existent or corrupted.") + + +class PhotoTourTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.PhotoTour + + # The PhotoTour dataset returns examples with different features with respect to the 'train' parameter. Thus, + # we overwrite 'FEATURE_TYPES' with a dummy value to satisfy the initial checks of the base class. Furthermore, we + # overwrite the 'test_feature_types()' method to select the correct feature types before the test is run. + FEATURE_TYPES = () + _TRAIN_FEATURE_TYPES = (torch.Tensor,) + _TEST_FEATURE_TYPES = (torch.Tensor, torch.Tensor, torch.Tensor) + + datasets_utils.combinations_grid(train=(True, False)) + + _NAME = "liberty" + + def dataset_args(self, tmpdir, config): + return tmpdir, self._NAME + + def inject_fake_data(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + + # In contrast to the original data, the fake images injected here comprise only a single patch. Thus, + # num_images == num_patches. + num_patches = 5 + + image_files = self._create_images(tmpdir, self._NAME, num_patches) + point_ids, info_file = self._create_info_file(tmpdir / self._NAME, num_patches) + num_matches, matches_file = self._create_matches_file(tmpdir / self._NAME, num_patches, point_ids) + + self._create_archive(tmpdir, self._NAME, *image_files, info_file, matches_file) + + return num_patches if config["train"] else num_matches + + def _create_images(self, root, name, num_images): + # The images in the PhotoTour dataset comprises of multiple grayscale patches of 64 x 64 pixels. Thus, the + # smallest fake image is 64 x 64 pixels and comprises a single patch. + return datasets_utils.create_image_folder( + root, name, lambda idx: f"patches{idx:04d}.bmp", num_images, size=(1, 64, 64) + ) + + def _create_info_file(self, root, num_images): + point_ids = torch.randint(num_images, size=(num_images,)).tolist() + + file = root / "info.txt" + with open(file, "w") as fh: + fh.writelines([f"{point_id} 0\n" for point_id in point_ids]) + + return point_ids, file + + def _create_matches_file(self, root, num_patches, point_ids): + lines = [ + f"{patch_id1} {point_ids[patch_id1]} 0 {patch_id2} {point_ids[patch_id2]} 0\n" + for patch_id1, patch_id2 in itertools.combinations(range(num_patches), 2) + ] + + file = root / "m50_100000_100000_0.txt" + with open(file, "w") as fh: + fh.writelines(lines) + + return len(lines), file + + def _create_archive(self, root, name, *files): + archive = root / f"{name}.zip" + with zipfile.ZipFile(archive, "w") as zip: + for file in files: + zip.write(file, arcname=file.relative_to(root)) + + return archive + + @datasets_utils.test_all_configs + def test_feature_types(self, config): + feature_types = self.FEATURE_TYPES + self.FEATURE_TYPES = self._TRAIN_FEATURE_TYPES if config["train"] else self._TEST_FEATURE_TYPES + try: + super().test_feature_types.__wrapped__(self, config) + finally: + self.FEATURE_TYPES = feature_types + + +class Flickr8kTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Flickr8k + + FEATURE_TYPES = (PIL.Image.Image, list) + + _IMAGES_FOLDER = "images" + _ANNOTATIONS_FILE = "captions.html" + + def dataset_args(self, tmpdir, config): + tmpdir = pathlib.Path(tmpdir) + root = tmpdir / self._IMAGES_FOLDER + ann_file = tmpdir / self._ANNOTATIONS_FILE + return str(root), str(ann_file) + + def inject_fake_data(self, tmpdir, config): + num_images = 3 + num_captions_per_image = 3 + + tmpdir = pathlib.Path(tmpdir) + + images = self._create_images(tmpdir, self._IMAGES_FOLDER, num_images) + self._create_annotations_file(tmpdir, self._ANNOTATIONS_FILE, images, num_captions_per_image) + + return dict(num_examples=num_images, captions=self._create_captions(num_captions_per_image)) + + def _create_images(self, root, name, num_images): + return datasets_utils.create_image_folder(root, name, self._image_file_name, num_images) + + def _image_file_name(self, idx): + id = datasets_utils.create_random_string(10, string.digits) + checksum = datasets_utils.create_random_string(10, string.digits, string.ascii_lowercase[:6]) + size = datasets_utils.create_random_string(1, "qwcko") + return f"{id}_{checksum}_{size}.jpg" + + def _create_annotations_file(self, root, name, images, num_captions_per_image): + with open(root / name, "w") as fh: + fh.write("") + for image in (None, *images): + self._add_image(fh, image, num_captions_per_image) + fh.write("
") + + def _add_image(self, fh, image, num_captions_per_image): + fh.write("") + self._add_image_header(fh, image) + fh.write("
    ") + self._add_image_captions(fh, num_captions_per_image) + fh.write("
") + + def _add_image_header(self, fh, image=None): + if image: + url = f"http://www.flickr.com/photos/user/{image.name.split('_')[0]}/" + data = f'{url}' + else: + data = "Image Not Found" + fh.write(f"{data}") + + def _add_image_captions(self, fh, num_captions_per_image): + for caption in self._create_captions(num_captions_per_image): + fh.write(f"
  • {caption}") + + def _create_captions(self, num_captions_per_image): + return [str(idx) for idx in range(num_captions_per_image)] + + def test_captions(self): + with self.create_dataset() as (dataset, info): + _, captions = dataset[0] + self.assertSequenceEqual(captions, info["captions"]) + + +class Flickr30kTestCase(Flickr8kTestCase): + DATASET_CLASS = datasets.Flickr30k + + FEATURE_TYPES = (PIL.Image.Image, list) + + _ANNOTATIONS_FILE = "captions.token" + + def _image_file_name(self, idx): + return f"{idx}.jpg" + + def _create_annotations_file(self, root, name, images, num_captions_per_image): + with open(root / name, "w") as fh: + for image, (idx, caption) in itertools.product( + images, enumerate(self._create_captions(num_captions_per_image)) + ): + fh.write(f"{image.name}#{idx}\t{caption}\n") + + +class MNISTTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.MNIST + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + + _MAGIC_DTYPES = { + torch.uint8: 8, + torch.int8: 9, + torch.int16: 11, + torch.int32: 12, + torch.float32: 13, + torch.float64: 14, + } + + _IMAGES_SIZE = (28, 28) + _IMAGES_DTYPE = torch.uint8 + + _LABELS_SIZE = () + _LABELS_DTYPE = torch.uint8 + + def inject_fake_data(self, tmpdir, config): + raw_dir = pathlib.Path(tmpdir) / self.DATASET_CLASS.__name__ / "raw" + os.makedirs(raw_dir, exist_ok=True) + + num_images = self._num_images(config) + self._create_binary_file( + raw_dir, self._images_file(config), (num_images, *self._IMAGES_SIZE), self._IMAGES_DTYPE + ) + self._create_binary_file( + raw_dir, self._labels_file(config), (num_images, *self._LABELS_SIZE), self._LABELS_DTYPE + ) + return num_images + + def _num_images(self, config): + return 2 if config["train"] else 1 + + def _images_file(self, config): + return f"{self._prefix(config)}-images-idx3-ubyte" + + def _labels_file(self, config): + return f"{self._prefix(config)}-labels-idx1-ubyte" + + def _prefix(self, config): + return "train" if config["train"] else "t10k" + + def _create_binary_file(self, root, filename, size, dtype): + with open(pathlib.Path(root) / filename, "wb") as fh: + for meta in (self._magic(dtype, len(size)), *size): + fh.write(self._encode(meta)) + + # If ever an MNIST variant is added that uses floating point data, this should be adapted. + data = torch.randint(0, torch.iinfo(dtype).max + 1, size, dtype=dtype) + fh.write(data.numpy().tobytes()) + + def _magic(self, dtype, dims): + return self._MAGIC_DTYPES[dtype] * 256 + dims + + def _encode(self, v): + return torch.tensor(v, dtype=torch.int32).numpy().tobytes()[::-1] + + +class FashionMNISTTestCase(MNISTTestCase): + DATASET_CLASS = datasets.FashionMNIST + + +class KMNISTTestCase(MNISTTestCase): + DATASET_CLASS = datasets.KMNIST + + +class EMNISTTestCase(MNISTTestCase): + DATASET_CLASS = datasets.EMNIST + + DEFAULT_CONFIG = dict(split="byclass") + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + split=("byclass", "bymerge", "balanced", "letters", "digits", "mnist"), train=(True, False) + ) + + def _prefix(self, config): + return f"emnist-{config['split']}-{'train' if config['train'] else 'test'}" + + +class QMNISTTestCase(MNISTTestCase): + DATASET_CLASS = datasets.QMNIST + + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(what=("train", "test", "test10k", "nist")) + + _LABELS_SIZE = (8,) + _LABELS_DTYPE = torch.int32 + + def _num_images(self, config): + if config["what"] == "nist": + return 3 + elif config["what"] == "train": + return 2 + elif config["what"] == "test50k": + # The split 'test50k' is defined as the last 50k images beginning at index 10000. Thus, we need to create + # more than 10000 images for the dataset to not be empty. Since this takes significantly longer than the + # creation of all other splits, this is excluded from the 'ADDITIONAL_CONFIGS' and is tested only once in + # 'test_num_examples_test50k'. + return 10001 + else: + return 1 + + def _labels_file(self, config): + return f"{self._prefix(config)}-labels-idx2-int" + + def _prefix(self, config): + if config["what"] == "nist": + return "xnist" + + if config["what"] is None: + what = "train" if config["train"] else "test" + elif config["what"].startswith("test"): + what = "test" + else: + what = config["what"] + + return f"qmnist-{what}" + + def test_num_examples_test50k(self): + with self.create_dataset(what="test50k") as (dataset, info): + # Since the split 'test50k' selects all images beginning from the index 10000, we subtract the number of + # created examples by this. + self.assertEqual(len(dataset), info["num_examples"] - 10000) + + +class DatasetFolderTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.DatasetFolder + + # The dataset has no fixed return type since it is defined by the loader parameter. For testing, we use a loader + # that simply returns the path as type 'str' instead of loading anything. See the 'dataset_args()' method. + FEATURE_TYPES = (str, int) + + _IMAGE_EXTENSIONS = ("jpg", "png") + _VIDEO_EXTENSIONS = ("avi", "mp4") + _EXTENSIONS = (*_IMAGE_EXTENSIONS, *_VIDEO_EXTENSIONS) + + # DatasetFolder has two mutually exclusive parameters: 'extensions' and 'is_valid_file'. One of both is required. + # We only iterate over different 'extensions' here and handle the tests for 'is_valid_file' in the + # 'test_is_valid_file()' method. + DEFAULT_CONFIG = dict(extensions=_EXTENSIONS) + ADDITIONAL_CONFIGS = ( + *datasets_utils.combinations_grid(extensions=[(ext,) for ext in _IMAGE_EXTENSIONS]), + dict(extensions=_IMAGE_EXTENSIONS), + *datasets_utils.combinations_grid(extensions=[(ext,) for ext in _VIDEO_EXTENSIONS]), + dict(extensions=_VIDEO_EXTENSIONS), + ) + + def dataset_args(self, tmpdir, config): + return tmpdir, lambda x: x + + def inject_fake_data(self, tmpdir, config): + extensions = config["extensions"] or self._is_valid_file_to_extensions(config["is_valid_file"]) + + num_examples_total = 0 + classes = [] + for ext, cls in zip(self._EXTENSIONS, string.ascii_letters): + if ext not in extensions: + continue + + create_example_folder = ( + datasets_utils.create_image_folder + if ext in self._IMAGE_EXTENSIONS + else datasets_utils.create_video_folder + ) + + num_examples = torch.randint(1, 3, size=()).item() + create_example_folder(tmpdir, cls, lambda idx: self._file_name_fn(cls, ext, idx), num_examples) + + num_examples_total += num_examples + classes.append(cls) + + return dict(num_examples=num_examples_total, classes=classes) + + def _file_name_fn(self, cls, ext, idx): + return f"{cls}_{idx}.{ext}" + + def _is_valid_file_to_extensions(self, is_valid_file): + return {ext for ext in self._EXTENSIONS if is_valid_file(f"foo.{ext}")} + + @datasets_utils.test_all_configs + def test_is_valid_file(self, config): + extensions = config.pop("extensions") + # We need to explicitly pass extensions=None here or otherwise it would be filled by the value from the + # DEFAULT_CONFIG. + with self.create_dataset( + config, extensions=None, is_valid_file=lambda file: pathlib.Path(file).suffix[1:] in extensions + ) as (dataset, info): + self.assertEqual(len(dataset), info["num_examples"]) + + @datasets_utils.test_all_configs + def test_classes(self, config): + with self.create_dataset(config) as (dataset, info): + self.assertSequenceEqual(dataset.classes, info["classes"]) + + +class ImageFolderTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.ImageFolder + + def inject_fake_data(self, tmpdir, config): + num_examples_total = 0 + classes = ("a", "b") + for cls in classes: + num_examples = torch.randint(1, 3, size=()).item() + num_examples_total += num_examples + + datasets_utils.create_image_folder(tmpdir, cls, lambda idx: f"{cls}_{idx}.png", num_examples) + + return dict(num_examples=num_examples_total, classes=classes) + + @datasets_utils.test_all_configs + def test_classes(self, config): + with self.create_dataset(config) as (dataset, info): + self.assertSequenceEqual(dataset.classes, info["classes"]) + + +class KittiTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.Kitti + FEATURE_TYPES = (PIL.Image.Image, (list, type(None))) # test split returns None as target + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + + def inject_fake_data(self, tmpdir, config): + kitti_dir = os.path.join(tmpdir, "Kitti", "raw") + os.makedirs(kitti_dir) + + split_to_num_examples = { + True: 1, + False: 2, + } + + # We need to create all folders(training and testing). + for is_training in (True, False): + num_examples = split_to_num_examples[is_training] + + datasets_utils.create_image_folder( + root=kitti_dir, + name=os.path.join("training" if is_training else "testing", "image_2"), + file_name_fn=lambda image_idx: f"{image_idx:06d}.png", + num_examples=num_examples, + ) + if is_training: + for image_idx in range(num_examples): + target_file_dir = os.path.join(kitti_dir, "training", "label_2") + os.makedirs(target_file_dir) + target_file_name = os.path.join(target_file_dir, f"{image_idx:06d}.txt") + target_contents = "Pedestrian 0.00 0 -0.20 712.40 143.00 810.73 307.92 1.89 0.48 1.20 1.84 1.47 8.41 0.01\n" # noqa + with open(target_file_name, "w") as target_file: + target_file.write(target_contents) + + return split_to_num_examples[config["train"]] + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/test_datasets_download.py b/test/test_datasets_download.py new file mode 100644 index 00000000000..6ff3a33bcc9 --- /dev/null +++ b/test/test_datasets_download.py @@ -0,0 +1,438 @@ +import contextlib +import itertools +import time +import unittest.mock +from datetime import datetime +from distutils import dir_util +from os import path +from urllib.error import HTTPError, URLError +from urllib.parse import urlparse +from urllib.request import urlopen, Request +import tempfile +import warnings + +import pytest + +from torchvision import datasets +from torchvision.datasets.utils import ( + download_url, + check_integrity, + download_file_from_google_drive, + _get_redirect_url, + USER_AGENT, +) + +from common_utils import get_tmp_dir +from fakedata_generation import places365_root + + +def limit_requests_per_time(min_secs_between_requests=2.0): + last_requests = {} + + def outer_wrapper(fn): + def inner_wrapper(request, *args, **kwargs): + url = request.full_url if isinstance(request, Request) else request + + netloc = urlparse(url).netloc + last_request = last_requests.get(netloc) + if last_request is not None: + elapsed_secs = (datetime.now() - last_request).total_seconds() + delta = min_secs_between_requests - elapsed_secs + if delta > 0: + time.sleep(delta) + + response = fn(request, *args, **kwargs) + last_requests[netloc] = datetime.now() + + return response + + return inner_wrapper + + return outer_wrapper + + +urlopen = limit_requests_per_time()(urlopen) + + +def resolve_redirects(max_hops=3): + def outer_wrapper(fn): + def inner_wrapper(request, *args, **kwargs): + initial_url = request.full_url if isinstance(request, Request) else request + url = _get_redirect_url(initial_url, max_hops=max_hops) + + if url == initial_url: + return fn(request, *args, **kwargs) + + warnings.warn(f"The URL {initial_url} ultimately redirects to {url}.") + + if not isinstance(request, Request): + return fn(url, *args, **kwargs) + + request_attrs = { + attr: getattr(request, attr) for attr in ("data", "headers", "origin_req_host", "unverifiable") + } + # the 'method' attribute does only exist if the request was created with it + if hasattr(request, "method"): + request_attrs["method"] = request.method + + return fn(Request(url, **request_attrs), *args, **kwargs) + + return inner_wrapper + + return outer_wrapper + + +urlopen = resolve_redirects()(urlopen) + + +@contextlib.contextmanager +def log_download_attempts( + urls_and_md5s=None, + file="utils", + patch=True, + mock_auxiliaries=None, +): + def add_mock(stack, name, file, **kwargs): + try: + return stack.enter_context(unittest.mock.patch(f"torchvision.datasets.{file}.{name}", **kwargs)) + except AttributeError as error: + if file != "utils": + return add_mock(stack, name, "utils", **kwargs) + else: + raise pytest.UsageError from error + + if urls_and_md5s is None: + urls_and_md5s = set() + if mock_auxiliaries is None: + mock_auxiliaries = patch + + with contextlib.ExitStack() as stack: + url_mock = add_mock(stack, "download_url", file, wraps=None if patch else download_url) + google_drive_mock = add_mock( + stack, "download_file_from_google_drive", file, wraps=None if patch else download_file_from_google_drive + ) + + if mock_auxiliaries: + add_mock(stack, "extract_archive", file) + + try: + yield urls_and_md5s + finally: + for args, kwargs in url_mock.call_args_list: + url = args[0] + md5 = args[-1] if len(args) == 4 else kwargs.get("md5") + urls_and_md5s.add((url, md5)) + + for args, kwargs in google_drive_mock.call_args_list: + id = args[0] + url = f"https://drive.google.com/file/d/{id}" + md5 = args[3] if len(args) == 4 else kwargs.get("md5") + urls_and_md5s.add((url, md5)) + + +def retry(fn, times=1, wait=5.0): + msgs = [] + for _ in range(times + 1): + try: + return fn() + except AssertionError as error: + msgs.append(str(error)) + time.sleep(wait) + else: + raise AssertionError( + "\n".join( + ( + f"Assertion failed {times + 1} times with {wait:.1f} seconds intermediate wait time.\n", + *(f"{idx}: {error}" for idx, error in enumerate(msgs, 1)), + ) + ) + ) + + +@contextlib.contextmanager +def assert_server_response_ok(): + try: + yield + except URLError as error: + raise AssertionError("The request timed out.") from error + except HTTPError as error: + raise AssertionError(f"The server returned {error.code}: {error.reason}.") from error + except RecursionError as error: + raise AssertionError(str(error)) from error + + +def assert_url_is_accessible(url, timeout=5.0): + request = Request(url, headers={"User-Agent": USER_AGENT}, method="HEAD") + with assert_server_response_ok(): + urlopen(request, timeout=timeout) + + +def assert_file_downloads_correctly(url, md5, timeout=5.0): + with get_tmp_dir() as root: + file = path.join(root, path.basename(url)) + with assert_server_response_ok(): + with open(file, "wb") as fh: + request = Request(url, headers={"User-Agent": USER_AGENT}) + response = urlopen(request, timeout=timeout) + fh.write(response.read()) + + assert check_integrity(file, md5=md5), "The MD5 checksums mismatch" + + +class DownloadConfig: + def __init__(self, url, md5=None, id=None): + self.url = url + self.md5 = md5 + self.id = id or url + + def __repr__(self): + return self.id + + +def make_download_configs(urls_and_md5s, name=None): + return [ + DownloadConfig(url, md5=md5, id=f"{name}, {url}" if name is not None else None) for url, md5 in urls_and_md5s + ] + + +def collect_download_configs(dataset_loader, name=None, **kwargs): + urls_and_md5s = set() + try: + with log_download_attempts(urls_and_md5s=urls_and_md5s, **kwargs): + dataset = dataset_loader() + except Exception: + dataset = None + + if name is None and dataset is not None: + name = type(dataset).__name__ + + return make_download_configs(urls_and_md5s, name) + + +# This is a workaround since fixtures, such as the built-in tmp_dir, can only be used within a test but not within a +# parametrization. Thus, we use a single root directory for all datasets and remove it when all download tests are run. +ROOT = tempfile.mkdtemp() + + +@pytest.fixture(scope="module", autouse=True) +def root(): + yield ROOT + dir_util.remove_tree(ROOT) + + +def places365(): + with log_download_attempts(patch=False) as urls_and_md5s: + for split, small in itertools.product(("train-standard", "train-challenge", "val"), (False, True)): + with places365_root(split=split, small=small) as places365: + root, data = places365 + + datasets.Places365(root, split=split, small=small, download=True) + + return make_download_configs(urls_and_md5s, name="Places365") + + +def caltech101(): + return collect_download_configs(lambda: datasets.Caltech101(ROOT, download=True), name="Caltech101") + + +def caltech256(): + return collect_download_configs(lambda: datasets.Caltech256(ROOT, download=True), name="Caltech256") + + +def cifar10(): + return collect_download_configs(lambda: datasets.CIFAR10(ROOT, download=True), name="CIFAR10") + + +def cifar100(): + return collect_download_configs(lambda: datasets.CIFAR100(ROOT, download=True), name="CIFAR100") + + +def voc(): + return itertools.chain( + *[ + collect_download_configs( + lambda: datasets.VOCSegmentation(ROOT, year=year, download=True), + name=f"VOC, {year}", + file="voc", + ) + for year in ("2007", "2007-test", "2008", "2009", "2010", "2011", "2012") + ] + ) + + +def mnist(): + with unittest.mock.patch.object(datasets.MNIST, "mirrors", datasets.MNIST.mirrors[-1:]): + return collect_download_configs(lambda: datasets.MNIST(ROOT, download=True), name="MNIST") + + +def fashion_mnist(): + return collect_download_configs(lambda: datasets.FashionMNIST(ROOT, download=True), name="FashionMNIST") + + +def kmnist(): + return collect_download_configs(lambda: datasets.KMNIST(ROOT, download=True), name="KMNIST") + + +def emnist(): + # the 'split' argument can be any valid one, since everything is downloaded anyway + return collect_download_configs(lambda: datasets.EMNIST(ROOT, split="byclass", download=True), name="EMNIST") + + +def qmnist(): + return itertools.chain( + *[ + collect_download_configs( + lambda: datasets.QMNIST(ROOT, what=what, download=True), + name=f"QMNIST, {what}", + file="mnist", + ) + for what in ("train", "test", "nist") + ] + ) + + +def omniglot(): + return itertools.chain( + *[ + collect_download_configs( + lambda: datasets.Omniglot(ROOT, background=background, download=True), + name=f"Omniglot, {'background' if background else 'evaluation'}", + ) + for background in (True, False) + ] + ) + + +def phototour(): + return itertools.chain( + *[ + collect_download_configs( + lambda: datasets.PhotoTour(ROOT, name=name, download=True), + name=f"PhotoTour, {name}", + file="phototour", + ) + # The names postfixed with '_harris' point to the domain 'matthewalunbrown.com'. For some reason all + # requests timeout from within CI. They are disabled until this is resolved. + for name in ("notredame", "yosemite", "liberty") # "notredame_harris", "yosemite_harris", "liberty_harris" + ] + ) + + +def sbdataset(): + return collect_download_configs( + lambda: datasets.SBDataset(ROOT, download=True), + name="SBDataset", + file="voc", + ) + + +def sbu(): + return collect_download_configs( + lambda: datasets.SBU(ROOT, download=True), + name="SBU", + file="sbu", + ) + + +def semeion(): + return collect_download_configs( + lambda: datasets.SEMEION(ROOT, download=True), + name="SEMEION", + file="semeion", + ) + + +def stl10(): + return collect_download_configs( + lambda: datasets.STL10(ROOT, download=True), + name="STL10", + ) + + +def svhn(): + return itertools.chain( + *[ + collect_download_configs( + lambda: datasets.SVHN(ROOT, split=split, download=True), + name=f"SVHN, {split}", + file="svhn", + ) + for split in ("train", "test", "extra") + ] + ) + + +def usps(): + return itertools.chain( + *[ + collect_download_configs( + lambda: datasets.USPS(ROOT, train=train, download=True), + name=f"USPS, {'train' if train else 'test'}", + file="usps", + ) + for train in (True, False) + ] + ) + + +def celeba(): + return collect_download_configs( + lambda: datasets.CelebA(ROOT, download=True), + name="CelebA", + file="celeba", + ) + + +def widerface(): + return collect_download_configs( + lambda: datasets.WIDERFace(ROOT, download=True), + name="WIDERFace", + file="widerface", + ) + + +def make_parametrize_kwargs(download_configs): + argvalues = [] + ids = [] + for config in download_configs: + argvalues.append((config.url, config.md5)) + ids.append(config.id) + + return dict(argnames=("url", "md5"), argvalues=argvalues, ids=ids) + + +@pytest.mark.parametrize( + **make_parametrize_kwargs( + itertools.chain( + places365(), + caltech101(), + caltech256(), + cifar10(), + cifar100(), + # The VOC download server is unstable. See https://github.com/pytorch/vision/issues/2953 for details. + # voc(), + mnist(), + fashion_mnist(), + kmnist(), + emnist(), + qmnist(), + omniglot(), + phototour(), + sbdataset(), + sbu(), + semeion(), + stl10(), + svhn(), + usps(), + celeba(), + widerface(), + ) + ) +) +def test_url_is_accessible(url, md5): + retry(lambda: assert_url_is_accessible(url)) + + +@pytest.mark.parametrize(**make_parametrize_kwargs(itertools.chain())) +def test_file_downloads_correctly(url, md5): + retry(lambda: assert_file_downloads_correctly(url, md5)) diff --git a/test/test_datasets_samplers.py b/test/test_datasets_samplers.py index 90f3f3806aa..87a4900489f 100644 --- a/test/test_datasets_samplers.py +++ b/test/test_datasets_samplers.py @@ -29,7 +29,7 @@ def get_list_of_videos(num_videos=5, sizes=None, fps=None): f = 5 else: f = fps[i] - data = torch.randint(0, 255, (size, 300, 400, 3), dtype=torch.uint8) + data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8) name = os.path.join(tmp_dir, "{}.mp4".format(i)) names.append(name) io.write_video(name, data, fps=f) diff --git a/test/test_datasets_utils.py b/test/test_datasets_utils.py index 14a53b75c54..949026d31cb 100644 --- a/test/test_datasets_utils.py +++ b/test/test_datasets_utils.py @@ -1,25 +1,21 @@ import os -import sys -import tempfile import torchvision.datasets.utils as utils import unittest +import unittest.mock import zipfile import tarfile import gzip import warnings -from torch._six import PY2 from torch._utils_internal import get_file_path_2 +from urllib.error import URLError +import itertools +import lzma -from common_utils import get_tmp_dir - -if sys.version_info < (3,): - from urllib2 import URLError -else: - from urllib.error import URLError +from common_utils import get_tmp_dir, call_args_to_kwargs_only TEST_FILE = get_file_path_2( - os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg') + os.path.dirname(os.path.abspath(__file__)), 'assets', 'encode_jpeg', 'grace_hopper_517x606.jpg') class Tester(unittest.TestCase): @@ -41,94 +37,195 @@ def test_check_integrity(self): self.assertTrue(utils.check_integrity(existing_fpath)) self.assertFalse(utils.check_integrity(nonexisting_fpath)) - @unittest.skipIf(PY2, "https://github.com/pytorch/vision/issues/1268") - def test_download_url(self): + def test_get_google_drive_file_id(self): + url = "https://drive.google.com/file/d/1hbzc_P1FuxMkcabkgn9ZKinBwW683j45/view" + expected = "1hbzc_P1FuxMkcabkgn9ZKinBwW683j45" + + actual = utils._get_google_drive_file_id(url) + assert actual == expected + + def test_get_google_drive_file_id_invalid_url(self): + url = "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz" + + assert utils._get_google_drive_file_id(url) is None + + def test_detect_file_type(self): + for file, expected in [ + ("foo.tar.xz", (".tar.xz", ".tar", ".xz")), + ("foo.tar", (".tar", ".tar", None)), + ("foo.tar.gz", (".tar.gz", ".tar", ".gz")), + ("foo.tgz", (".tgz", ".tar", ".gz")), + ("foo.gz", (".gz", None, ".gz")), + ("foo.zip", (".zip", ".zip", None)), + ("foo.xz", (".xz", None, ".xz")), + ]: + with self.subTest(file=file): + self.assertSequenceEqual(utils._detect_file_type(file), expected) + + def test_detect_file_type_no_ext(self): + with self.assertRaises(RuntimeError): + utils._detect_file_type("foo") + + def test_detect_file_type_to_many_exts(self): + with self.assertRaises(RuntimeError): + utils._detect_file_type("foo.bar.tar.gz") + + def test_detect_file_type_unknown_archive_type(self): + with self.assertRaises(RuntimeError): + utils._detect_file_type("foo.bar.gz") + + def test_detect_file_type_unknown_compression(self): + with self.assertRaises(RuntimeError): + utils._detect_file_type("foo.tar.baz") + + def test_detect_file_type_unknown_partial_ext(self): + with self.assertRaises(RuntimeError): + utils._detect_file_type("foo.bar") + + def test_decompress_gzip(self): + def create_compressed(root, content="this is the content"): + file = os.path.join(root, "file") + compressed = f"{file}.gz" + + with gzip.open(compressed, "wb") as fh: + fh.write(content.encode()) + + return compressed, file, content + with get_tmp_dir() as temp_dir: - url = "http://github.com/pytorch/vision/archive/master.zip" - try: - utils.download_url(url, temp_dir) - self.assertFalse(len(os.listdir(temp_dir)) == 0) - except URLError: - msg = "could not download test file '{}'".format(url) - warnings.warn(msg, RuntimeWarning) - raise unittest.SkipTest(msg) - - @unittest.skipIf(PY2, "https://github.com/pytorch/vision/issues/1268") - def test_download_url_retry_http(self): + compressed, file, content = create_compressed(temp_dir) + + utils._decompress(compressed) + + self.assertTrue(os.path.exists(file)) + + with open(file, "r") as fh: + self.assertEqual(fh.read(), content) + + def test_decompress_lzma(self): + def create_compressed(root, content="this is the content"): + file = os.path.join(root, "file") + compressed = f"{file}.xz" + + with lzma.open(compressed, "wb") as fh: + fh.write(content.encode()) + + return compressed, file, content + with get_tmp_dir() as temp_dir: - url = "https://github.com/pytorch/vision/archive/master.zip" - try: - utils.download_url(url, temp_dir) - self.assertFalse(len(os.listdir(temp_dir)) == 0) - except URLError: - msg = "could not download test file '{}'".format(url) - warnings.warn(msg, RuntimeWarning) - raise unittest.SkipTest(msg) - - @unittest.skipIf(sys.version_info < (3,), "Python2 doesn't raise error") - def test_download_url_dont_exist(self): + compressed, file, content = create_compressed(temp_dir) + + utils.extract_archive(compressed, temp_dir) + + self.assertTrue(os.path.exists(file)) + + with open(file, "r") as fh: + self.assertEqual(fh.read(), content) + + def test_decompress_no_compression(self): + with self.assertRaises(RuntimeError): + utils._decompress("foo.tar") + + def test_decompress_remove_finished(self): + def create_compressed(root, content="this is the content"): + file = os.path.join(root, "file") + compressed = f"{file}.gz" + + with gzip.open(compressed, "wb") as fh: + fh.write(content.encode()) + + return compressed, file, content + with get_tmp_dir() as temp_dir: - url = "http://github.com/pytorch/vision/archive/this_doesnt_exist.zip" - with self.assertRaises(URLError): - utils.download_url(url, temp_dir) + compressed, file, content = create_compressed(temp_dir) + + utils.extract_archive(compressed, temp_dir, remove_finished=True) + + self.assertFalse(os.path.exists(compressed)) + + def test_extract_archive_defer_to_decompress(self): + filename = "foo" + for ext, remove_finished in itertools.product((".gz", ".xz"), (True, False)): + with self.subTest(ext=ext, remove_finished=remove_finished): + with unittest.mock.patch("torchvision.datasets.utils._decompress") as mock: + file = f"{filename}{ext}" + utils.extract_archive(file, remove_finished=remove_finished) + + mock.assert_called_once() + self.assertEqual( + call_args_to_kwargs_only(mock.call_args, utils._decompress), + dict(from_path=file, to_path=filename, remove_finished=remove_finished), + ) - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') def test_extract_zip(self): + def create_archive(root, content="this is the content"): + file = os.path.join(root, "dst.txt") + archive = os.path.join(root, "archive.zip") + + with zipfile.ZipFile(archive, "w") as zf: + zf.writestr(os.path.basename(file), content) + + return archive, file, content + with get_tmp_dir() as temp_dir: - with tempfile.NamedTemporaryFile(suffix='.zip') as f: - with zipfile.ZipFile(f, 'w') as zf: - zf.writestr('file.tst', 'this is the content') - utils.extract_archive(f.name, temp_dir) - self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst'))) - with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf: - data = nf.read() - self.assertEqual(data, 'this is the content') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') + archive, file, content = create_archive(temp_dir) + + utils.extract_archive(archive, temp_dir) + + self.assertTrue(os.path.exists(file)) + + with open(file, "r") as fh: + self.assertEqual(fh.read(), content) + def test_extract_tar(self): - for ext, mode in zip(['.tar', '.tar.gz'], ['w', 'w:gz']): + def create_archive(root, ext, mode, content="this is the content"): + src = os.path.join(root, "src.txt") + dst = os.path.join(root, "dst.txt") + archive = os.path.join(root, f"archive{ext}") + + with open(src, "w") as fh: + fh.write(content) + + with tarfile.open(archive, mode=mode) as fh: + fh.add(src, arcname=os.path.basename(dst)) + + return archive, dst, content + + for ext, mode in zip(['.tar', '.tar.gz', '.tgz'], ['w', 'w:gz', 'w:gz']): with get_tmp_dir() as temp_dir: - with tempfile.NamedTemporaryFile() as bf: - bf.write("this is the content".encode()) - bf.seek(0) - with tempfile.NamedTemporaryFile(suffix=ext) as f: - with tarfile.open(f.name, mode=mode) as zf: - zf.add(bf.name, arcname='file.tst') - utils.extract_archive(f.name, temp_dir) - self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst'))) - with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf: - data = nf.read() - self.assertEqual(data, 'this is the content') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - @unittest.skipIf(sys.version_info < (3,), "Extracting .tar.xz files is not supported under Python 2.x") + archive, file, content = create_archive(temp_dir, ext, mode) + + utils.extract_archive(archive, temp_dir) + + self.assertTrue(os.path.exists(file)) + + with open(file, "r") as fh: + self.assertEqual(fh.read(), content) + def test_extract_tar_xz(self): + def create_archive(root, ext, mode, content="this is the content"): + src = os.path.join(root, "src.txt") + dst = os.path.join(root, "dst.txt") + archive = os.path.join(root, f"archive{ext}") + + with open(src, "w") as fh: + fh.write(content) + + with tarfile.open(archive, mode=mode) as fh: + fh.add(src, arcname=os.path.basename(dst)) + + return archive, dst, content + for ext, mode in zip(['.tar.xz'], ['w:xz']): with get_tmp_dir() as temp_dir: - with tempfile.NamedTemporaryFile() as bf: - bf.write("this is the content".encode()) - bf.seek(0) - with tempfile.NamedTemporaryFile(suffix=ext) as f: - with tarfile.open(f.name, mode=mode) as zf: - zf.add(bf.name, arcname='file.tst') - utils.extract_archive(f.name, temp_dir) - self.assertTrue(os.path.exists(os.path.join(temp_dir, 'file.tst'))) - with open(os.path.join(temp_dir, 'file.tst'), 'r') as nf: - data = nf.read() - self.assertEqual(data, 'this is the content') - - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') - def test_extract_gzip(self): - with get_tmp_dir() as temp_dir: - with tempfile.NamedTemporaryFile(suffix='.gz') as f: - with gzip.GzipFile(f.name, 'wb') as zf: - zf.write('this is the content'.encode()) - utils.extract_archive(f.name, temp_dir) - f_name = os.path.join(temp_dir, os.path.splitext(os.path.basename(f.name))[0]) - self.assertTrue(os.path.exists(f_name)) - with open(os.path.join(f_name), 'r') as nf: - data = nf.read() - self.assertEqual(data, 'this is the content') + archive, file, content = create_archive(temp_dir, ext, mode) + + utils.extract_archive(archive, temp_dir) + + self.assertTrue(os.path.exists(file)) + + with open(file, "r") as fh: + self.assertEqual(fh.read(), content) def test_verify_str_arg(self): self.assertEqual("a", utils.verify_str_arg("a", "arg", ("a",))) diff --git a/test/test_datasets_video_utils.py b/test/test_datasets_video_utils.py index 2488edc613d..694214544f7 100644 --- a/test/test_datasets_video_utils.py +++ b/test/test_datasets_video_utils.py @@ -1,5 +1,4 @@ import contextlib -import sys import os import torch import unittest @@ -23,7 +22,7 @@ def get_list_of_videos(num_videos=5, sizes=None, fps=None): f = 5 else: f = fps[i] - data = torch.randint(0, 255, (size, 300, 400, 3), dtype=torch.uint8) + data = torch.randint(0, 256, (size, 300, 400, 3), dtype=torch.uint8) name = os.path.join(tmp_dir, "{}.mp4".format(i)) names.append(name) io.write_video(name, data, fps=f) @@ -59,10 +58,9 @@ def test_unfold(self): self.assertTrue(r.equal(expected)) @unittest.skipIf(not io.video._av_available(), "this test requires av") - @unittest.skipIf(sys.platform == 'win32', 'temporarily disabled on Windows') def test_video_clips(self): with get_list_of_videos(num_videos=3) as video_list: - video_clips = VideoClips(video_list, 5, 5) + video_clips = VideoClips(video_list, 5, 5, num_workers=2) self.assertEqual(video_clips.num_clips(), 1 + 2 + 3) for i, (v_idx, c_idx) in enumerate([(0, 0), (1, 0), (1, 1), (2, 0), (2, 1), (2, 2)]): video_idx, clip_idx = video_clips.get_clip_location(i) @@ -84,17 +82,15 @@ def test_video_clips(self): self.assertEqual(clip_idx, c_idx) @unittest.skipIf(not io.video._av_available(), "this test requires av") - @unittest.skipIf(sys.platform == 'win32', 'temporarily disabled on Windows') def test_video_clips_custom_fps(self): with get_list_of_videos(num_videos=3, sizes=[12, 12, 12], fps=[3, 4, 6]) as video_list: num_frames = 4 for fps in [1, 3, 4, 10]: - video_clips = VideoClips(video_list, num_frames, num_frames, fps) + video_clips = VideoClips(video_list, num_frames, num_frames, fps, num_workers=2) for i in range(video_clips.num_clips()): video, audio, info, video_idx = video_clips.get_clip(i) self.assertEqual(video.shape[0], num_frames) self.assertEqual(info["video_fps"], fps) - self.assertEqual(info, {"video_fps": fps}) # TODO add tests checking that the content is right def test_compute_clips_for_video(self): @@ -123,6 +119,16 @@ def test_compute_clips_for_video(self): self.assertTrue(clips.equal(idxs)) self.assertTrue(idxs.flatten().equal(resampled_idxs)) + # case 3: frames aren't enough for a clip + num_frames = 32 + orig_fps = 30 + new_fps = 13 + with self.assertWarns(UserWarning): + clips, idxs = VideoClips.compute_clips_for_video(video_pts, num_frames, num_frames, + orig_fps, new_fps) + self.assertEqual(len(clips), 0) + self.assertEqual(len(idxs), 0) + if __name__ == '__main__': unittest.main() diff --git a/test/test_datasets_video_utils_opt.py b/test/test_datasets_video_utils_opt.py index f94af400838..8075c701ed9 100644 --- a/test/test_datasets_video_utils_opt.py +++ b/test/test_datasets_video_utils_opt.py @@ -2,8 +2,8 @@ from torchvision import set_video_backend import test_datasets_video_utils - -set_video_backend('video_reader') +# Disabling the video backend switching temporarily +# set_video_backend('video_reader') if __name__ == '__main__': diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index e318420102b..b237720d7d7 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -1,80 +1,993 @@ -from __future__ import division +import os +import unittest +import colorsys +import math + +import numpy as np + import torch -import torchvision.transforms as transforms import torchvision.transforms.functional_tensor as F_t +import torchvision.transforms.functional_pil as F_pil import torchvision.transforms.functional as F -import numpy as np -import unittest -import random +from torchvision.transforms import InterpolationMode + +from common_utils import TransformsTester + +from typing import Dict, List, Sequence, Tuple + + +NEAREST, BILINEAR, BICUBIC = InterpolationMode.NEAREST, InterpolationMode.BILINEAR, InterpolationMode.BICUBIC + + +class Tester(TransformsTester): + def setUp(self): + self.device = "cpu" -class Tester(unittest.TestCase): + def _test_fn_on_batch(self, batch_tensors, fn, scripted_fn_atol=1e-8, **fn_kwargs): + transformed_batch = fn(batch_tensors, **fn_kwargs) + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + transformed_img = fn(img_tensor, **fn_kwargs) + self.assertTrue(transformed_img.equal(transformed_batch[i, ...])) + + scripted_fn = torch.jit.script(fn) + # scriptable function test + s_transformed_batch = scripted_fn(batch_tensors, **fn_kwargs) + self.assertTrue(transformed_batch.allclose(s_transformed_batch, atol=scripted_fn_atol)) + + def test_assert_image_tensor(self): + shape = (100,) + tensor = torch.rand(*shape, dtype=torch.float, device=self.device) + + list_of_methods = [(F_t._get_image_size, (tensor, )), (F_t.vflip, (tensor, )), + (F_t.hflip, (tensor, )), (F_t.crop, (tensor, 1, 2, 4, 5)), + (F_t.adjust_brightness, (tensor, 0.)), (F_t.adjust_contrast, (tensor, 1.)), + (F_t.adjust_hue, (tensor, -0.5)), (F_t.adjust_saturation, (tensor, 2.)), + (F_t.center_crop, (tensor, [10, 11])), (F_t.five_crop, (tensor, [10, 11])), + (F_t.ten_crop, (tensor, [10, 11])), (F_t.pad, (tensor, [2, ], 2, "constant")), + (F_t.resize, (tensor, [10, 11])), (F_t.perspective, (tensor, [0.2, ])), + (F_t.gaussian_blur, (tensor, (2, 2), (0.7, 0.5))), + (F_t.invert, (tensor, )), (F_t.posterize, (tensor, 0)), + (F_t.solarize, (tensor, 0.3)), (F_t.adjust_sharpness, (tensor, 0.3)), + (F_t.autocontrast, (tensor, )), (F_t.equalize, (tensor, ))] + + for func, args in list_of_methods: + with self.assertRaises(Exception) as context: + func(*args) + + self.assertTrue('Tensor is not a torch image.' in str(context.exception)) def test_vflip(self): - img_tensor = torch.randn(3, 16, 16) - vflipped_img = F_t.vflip(img_tensor) - vflipped_img_again = F_t.vflip(vflipped_img) - self.assertEqual(vflipped_img.shape, img_tensor.shape) - self.assertTrue(torch.equal(img_tensor, vflipped_img_again)) + script_vflip = torch.jit.script(F.vflip) + + img_tensor, pil_img = self._create_data(16, 18, device=self.device) + vflipped_img = F.vflip(img_tensor) + vflipped_pil_img = F.vflip(pil_img) + self.compareTensorToPIL(vflipped_img, vflipped_pil_img) + + # scriptable function test + vflipped_img_script = script_vflip(img_tensor) + self.assertTrue(vflipped_img.equal(vflipped_img_script)) + + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + self._test_fn_on_batch(batch_tensors, F.vflip) def test_hflip(self): - img_tensor = torch.randn(3, 16, 16) - hflipped_img = F_t.hflip(img_tensor) - hflipped_img_again = F_t.hflip(hflipped_img) - self.assertEqual(hflipped_img.shape, img_tensor.shape) - self.assertTrue(torch.equal(img_tensor, hflipped_img_again)) + script_hflip = torch.jit.script(F.hflip) + + img_tensor, pil_img = self._create_data(16, 18, device=self.device) + hflipped_img = F.hflip(img_tensor) + hflipped_pil_img = F.hflip(pil_img) + self.compareTensorToPIL(hflipped_img, hflipped_pil_img) + + # scriptable function test + hflipped_img_script = script_hflip(img_tensor) + self.assertTrue(hflipped_img.equal(hflipped_img_script)) + + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + self._test_fn_on_batch(batch_tensors, F.hflip) def test_crop(self): - img_tensor = torch.randint(0, 255, (3, 16, 16), dtype=torch.uint8) - top = random.randint(0, 15) - left = random.randint(0, 15) - height = random.randint(1, 16 - top) - width = random.randint(1, 16 - left) - img_cropped = F_t.crop(img_tensor, top, left, height, width) - img_PIL = transforms.ToPILImage()(img_tensor) - img_PIL_cropped = F.crop(img_PIL, top, left, height, width) - img_cropped_GT = transforms.ToTensor()(img_PIL_cropped) - - self.assertTrue(torch.equal(img_cropped, (img_cropped_GT * 255).to(torch.uint8)), - "functional_tensor crop not working") - - def test_adjustments(self): - fns = ((F.adjust_brightness, F_t.adjust_brightness), - (F.adjust_contrast, F_t.adjust_contrast), - (F.adjust_saturation, F_t.adjust_saturation)) - - for _ in range(20): - channels = 3 - dims = torch.randint(1, 50, (2,)) - shape = (channels, dims[0], dims[1]) - - if torch.randint(0, 2, (1,)) == 0: - img = torch.rand(*shape, dtype=torch.float) - else: - img = torch.randint(0, 256, shape, dtype=torch.uint8) - - factor = 3 * torch.rand(1) - for f, ft in fns: - - ft_img = ft(img, factor) - if not img.dtype.is_floating_point: - ft_img = ft_img.to(torch.float) / 255 - - img_pil = transforms.ToPILImage()(img) - f_img_pil = f(img_pil, factor) - f_img = transforms.ToTensor()(f_img_pil) - - # F uses uint8 and F_t uses float, so there is a small - # difference in values caused by (at most 5) truncations. - max_diff = (ft_img - f_img).abs().max() - self.assertLess(max_diff, 5 / 255 + 1e-5) + script_crop = torch.jit.script(F.crop) + + img_tensor, pil_img = self._create_data(16, 18, device=self.device) + + test_configs = [ + (1, 2, 4, 5), # crop inside top-left corner + (2, 12, 3, 4), # crop inside top-right corner + (8, 3, 5, 6), # crop inside bottom-left corner + (8, 11, 4, 3), # crop inside bottom-right corner + ] + + for top, left, height, width in test_configs: + pil_img_cropped = F.crop(pil_img, top, left, height, width) + + img_tensor_cropped = F.crop(img_tensor, top, left, height, width) + self.compareTensorToPIL(img_tensor_cropped, pil_img_cropped) + + img_tensor_cropped = script_crop(img_tensor, top, left, height, width) + self.compareTensorToPIL(img_tensor_cropped, pil_img_cropped) + + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + self._test_fn_on_batch(batch_tensors, F.crop, top=top, left=left, height=height, width=width) + + def test_hsv2rgb(self): + scripted_fn = torch.jit.script(F_t._hsv2rgb) + shape = (3, 100, 150) + for _ in range(10): + hsv_img = torch.rand(*shape, dtype=torch.float, device=self.device) + rgb_img = F_t._hsv2rgb(hsv_img) + ft_img = rgb_img.permute(1, 2, 0).flatten(0, 1) + + h, s, v, = hsv_img.unbind(0) + h = h.flatten().cpu().numpy() + s = s.flatten().cpu().numpy() + v = v.flatten().cpu().numpy() + + rgb = [] + for h1, s1, v1 in zip(h, s, v): + rgb.append(colorsys.hsv_to_rgb(h1, s1, v1)) + colorsys_img = torch.tensor(rgb, dtype=torch.float32, device=self.device) + max_diff = (ft_img - colorsys_img).abs().max() + self.assertLess(max_diff, 1e-5) + + s_rgb_img = scripted_fn(hsv_img) + self.assertTrue(rgb_img.allclose(s_rgb_img)) + + batch_tensors = self._create_data_batch(120, 100, num_samples=4, device=self.device).float() + self._test_fn_on_batch(batch_tensors, F_t._hsv2rgb) + + def test_rgb2hsv(self): + scripted_fn = torch.jit.script(F_t._rgb2hsv) + shape = (3, 150, 100) + for _ in range(10): + rgb_img = torch.rand(*shape, dtype=torch.float, device=self.device) + hsv_img = F_t._rgb2hsv(rgb_img) + ft_hsv_img = hsv_img.permute(1, 2, 0).flatten(0, 1) + + r, g, b, = rgb_img.unbind(dim=-3) + r = r.flatten().cpu().numpy() + g = g.flatten().cpu().numpy() + b = b.flatten().cpu().numpy() + + hsv = [] + for r1, g1, b1 in zip(r, g, b): + hsv.append(colorsys.rgb_to_hsv(r1, g1, b1)) + + colorsys_img = torch.tensor(hsv, dtype=torch.float32, device=self.device) + + ft_hsv_img_h, ft_hsv_img_sv = torch.split(ft_hsv_img, [1, 2], dim=1) + colorsys_img_h, colorsys_img_sv = torch.split(colorsys_img, [1, 2], dim=1) + + max_diff_h = ((colorsys_img_h * 2 * math.pi).sin() - (ft_hsv_img_h * 2 * math.pi).sin()).abs().max() + max_diff_sv = (colorsys_img_sv - ft_hsv_img_sv).abs().max() + max_diff = max(max_diff_h, max_diff_sv) + self.assertLess(max_diff, 1e-5) + + s_hsv_img = scripted_fn(rgb_img) + self.assertTrue(hsv_img.allclose(s_hsv_img, atol=1e-7)) + + batch_tensors = self._create_data_batch(120, 100, num_samples=4, device=self.device).float() + self._test_fn_on_batch(batch_tensors, F_t._rgb2hsv) def test_rgb_to_grayscale(self): - img_tensor = torch.randint(0, 255, (3, 16, 16), dtype=torch.uint8) - grayscale_tensor = F_t.rgb_to_grayscale(img_tensor).to(int) - grayscale_pil_img = torch.tensor(np.array(F.to_grayscale(F.to_pil_image(img_tensor)))).to(int) - max_diff = (grayscale_tensor - grayscale_pil_img).abs().max() - self.assertLess(max_diff, 1.0001) + script_rgb_to_grayscale = torch.jit.script(F.rgb_to_grayscale) + + img_tensor, pil_img = self._create_data(32, 34, device=self.device) + + for num_output_channels in (3, 1): + gray_pil_image = F.rgb_to_grayscale(pil_img, num_output_channels=num_output_channels) + gray_tensor = F.rgb_to_grayscale(img_tensor, num_output_channels=num_output_channels) + + self.approxEqualTensorToPIL(gray_tensor.float(), gray_pil_image, tol=1.0 + 1e-10, agg_method="max") + + s_gray_tensor = script_rgb_to_grayscale(img_tensor, num_output_channels=num_output_channels) + self.assertTrue(s_gray_tensor.equal(gray_tensor)) + + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + self._test_fn_on_batch(batch_tensors, F.rgb_to_grayscale, num_output_channels=num_output_channels) + + def test_center_crop(self): + script_center_crop = torch.jit.script(F.center_crop) + + img_tensor, pil_img = self._create_data(32, 34, device=self.device) + + cropped_pil_image = F.center_crop(pil_img, [10, 11]) + + cropped_tensor = F.center_crop(img_tensor, [10, 11]) + self.compareTensorToPIL(cropped_tensor, cropped_pil_image) + + cropped_tensor = script_center_crop(img_tensor, [10, 11]) + self.compareTensorToPIL(cropped_tensor, cropped_pil_image) + + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + self._test_fn_on_batch(batch_tensors, F.center_crop, output_size=[10, 11]) + + def test_five_crop(self): + script_five_crop = torch.jit.script(F.five_crop) + + img_tensor, pil_img = self._create_data(32, 34, device=self.device) + + cropped_pil_images = F.five_crop(pil_img, [10, 11]) + + cropped_tensors = F.five_crop(img_tensor, [10, 11]) + for i in range(5): + self.compareTensorToPIL(cropped_tensors[i], cropped_pil_images[i]) + + cropped_tensors = script_five_crop(img_tensor, [10, 11]) + for i in range(5): + self.compareTensorToPIL(cropped_tensors[i], cropped_pil_images[i]) + + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + tuple_transformed_batches = F.five_crop(batch_tensors, [10, 11]) + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + tuple_transformed_imgs = F.five_crop(img_tensor, [10, 11]) + self.assertEqual(len(tuple_transformed_imgs), len(tuple_transformed_batches)) + + for j in range(len(tuple_transformed_imgs)): + true_transformed_img = tuple_transformed_imgs[j] + transformed_img = tuple_transformed_batches[j][i, ...] + self.assertTrue(true_transformed_img.equal(transformed_img)) + + # scriptable function test + s_tuple_transformed_batches = script_five_crop(batch_tensors, [10, 11]) + for transformed_batch, s_transformed_batch in zip(tuple_transformed_batches, s_tuple_transformed_batches): + self.assertTrue(transformed_batch.equal(s_transformed_batch)) + + def test_ten_crop(self): + script_ten_crop = torch.jit.script(F.ten_crop) + + img_tensor, pil_img = self._create_data(32, 34, device=self.device) + + cropped_pil_images = F.ten_crop(pil_img, [10, 11]) + + cropped_tensors = F.ten_crop(img_tensor, [10, 11]) + for i in range(10): + self.compareTensorToPIL(cropped_tensors[i], cropped_pil_images[i]) + + cropped_tensors = script_ten_crop(img_tensor, [10, 11]) + for i in range(10): + self.compareTensorToPIL(cropped_tensors[i], cropped_pil_images[i]) + + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + tuple_transformed_batches = F.ten_crop(batch_tensors, [10, 11]) + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + tuple_transformed_imgs = F.ten_crop(img_tensor, [10, 11]) + self.assertEqual(len(tuple_transformed_imgs), len(tuple_transformed_batches)) + + for j in range(len(tuple_transformed_imgs)): + true_transformed_img = tuple_transformed_imgs[j] + transformed_img = tuple_transformed_batches[j][i, ...] + self.assertTrue(true_transformed_img.equal(transformed_img)) + + # scriptable function test + s_tuple_transformed_batches = script_ten_crop(batch_tensors, [10, 11]) + for transformed_batch, s_transformed_batch in zip(tuple_transformed_batches, s_tuple_transformed_batches): + self.assertTrue(transformed_batch.equal(s_transformed_batch)) + + def test_pad(self): + script_fn = torch.jit.script(F.pad) + tensor, pil_img = self._create_data(7, 8, device=self.device) + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + + for dt in [None, torch.float32, torch.float64, torch.float16]: + + if dt == torch.float16 and torch.device(self.device).type == "cpu": + # skip float16 on CPU case + continue + + if dt is not None: + # This is a trivial cast to float of uint8 data to test all cases + tensor = tensor.to(dt) + batch_tensors = batch_tensors.to(dt) + + for pad in [2, [3, ], [0, 3], (3, 3), [4, 2, 4, 3]]: + configs = [ + {"padding_mode": "constant", "fill": 0}, + {"padding_mode": "constant", "fill": 10}, + {"padding_mode": "constant", "fill": 20}, + {"padding_mode": "edge"}, + {"padding_mode": "reflect"}, + {"padding_mode": "symmetric"}, + ] + for kwargs in configs: + pad_tensor = F_t.pad(tensor, pad, **kwargs) + pad_pil_img = F_pil.pad(pil_img, pad, **kwargs) + + pad_tensor_8b = pad_tensor + # we need to cast to uint8 to compare with PIL image + if pad_tensor_8b.dtype != torch.uint8: + pad_tensor_8b = pad_tensor_8b.to(torch.uint8) + + self.compareTensorToPIL(pad_tensor_8b, pad_pil_img, msg="{}, {}".format(pad, kwargs)) + + if isinstance(pad, int): + script_pad = [pad, ] + else: + script_pad = pad + pad_tensor_script = script_fn(tensor, script_pad, **kwargs) + self.assertTrue(pad_tensor.equal(pad_tensor_script), msg="{}, {}".format(pad, kwargs)) + + self._test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **kwargs) + + def _test_adjust_fn(self, fn, fn_pil, fn_t, configs, tol=2.0 + 1e-10, agg_method="max", + dts=(None, torch.float32, torch.float64)): + script_fn = torch.jit.script(fn) + torch.manual_seed(15) + tensor, pil_img = self._create_data(26, 34, device=self.device) + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + + for dt in dts: + + if dt is not None: + tensor = F.convert_image_dtype(tensor, dt) + batch_tensors = F.convert_image_dtype(batch_tensors, dt) + + for config in configs: + adjusted_tensor = fn_t(tensor, **config) + adjusted_pil = fn_pil(pil_img, **config) + scripted_result = script_fn(tensor, **config) + msg = "{}, {}".format(dt, config) + self.assertEqual(adjusted_tensor.dtype, scripted_result.dtype, msg=msg) + self.assertEqual(adjusted_tensor.size()[1:], adjusted_pil.size[::-1], msg=msg) + + rbg_tensor = adjusted_tensor + + if adjusted_tensor.dtype != torch.uint8: + rbg_tensor = F.convert_image_dtype(adjusted_tensor, torch.uint8) + + # Check that max difference does not exceed 2 in [0, 255] range + # Exact matching is not possible due to incompatibility convert_image_dtype and PIL results + self.approxEqualTensorToPIL(rbg_tensor.float(), adjusted_pil, tol=tol, msg=msg, agg_method=agg_method) + + atol = 1e-6 + if adjusted_tensor.dtype == torch.uint8 and "cuda" in torch.device(self.device).type: + atol = 1.0 + self.assertTrue(adjusted_tensor.allclose(scripted_result, atol=atol), msg=msg) + + self._test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=atol, **config) + + def test_adjust_brightness(self): + self._test_adjust_fn( + F.adjust_brightness, + F_pil.adjust_brightness, + F_t.adjust_brightness, + [{"brightness_factor": f} for f in [0.1, 0.5, 1.0, 1.34, 2.5]] + ) + + def test_adjust_contrast(self): + self._test_adjust_fn( + F.adjust_contrast, + F_pil.adjust_contrast, + F_t.adjust_contrast, + [{"contrast_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]] + ) + + def test_adjust_saturation(self): + self._test_adjust_fn( + F.adjust_saturation, + F_pil.adjust_saturation, + F_t.adjust_saturation, + [{"saturation_factor": f} for f in [0.5, 0.75, 1.0, 1.5, 2.0]] + ) + + def test_adjust_hue(self): + self._test_adjust_fn( + F.adjust_hue, + F_pil.adjust_hue, + F_t.adjust_hue, + [{"hue_factor": f} for f in [-0.45, -0.25, 0.0, 0.25, 0.45]], + tol=16.1, + agg_method="max" + ) + + def test_adjust_gamma(self): + self._test_adjust_fn( + F.adjust_gamma, + F_pil.adjust_gamma, + F_t.adjust_gamma, + [{"gamma": g1, "gain": g2} for g1, g2 in zip([0.8, 1.0, 1.2], [0.7, 1.0, 1.3])] + ) + + def test_resize(self): + script_fn = torch.jit.script(F.resize) + tensor, pil_img = self._create_data(26, 36, device=self.device) + batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device) + + for dt in [None, torch.float32, torch.float64, torch.float16]: + + if dt == torch.float16 and torch.device(self.device).type == "cpu": + # skip float16 on CPU case + continue + + if dt is not None: + # This is a trivial cast to float of uint8 data to test all cases + tensor = tensor.to(dt) + batch_tensors = batch_tensors.to(dt) + + for size in [32, 26, [32, ], [32, 32], (32, 32), [26, 35]]: + for max_size in (None, 33, 40, 1000): + if max_size is not None and isinstance(size, Sequence) and len(size) != 1: + continue # unsupported, see assertRaises below + for interpolation in [BILINEAR, BICUBIC, NEAREST]: + resized_tensor = F.resize(tensor, size=size, interpolation=interpolation, max_size=max_size) + resized_pil_img = F.resize(pil_img, size=size, interpolation=interpolation, max_size=max_size) + + self.assertEqual( + resized_tensor.size()[1:], resized_pil_img.size[::-1], + msg="{}, {}".format(size, interpolation) + ) + + if interpolation not in [NEAREST, ]: + # We can not check values if mode = NEAREST, as results are different + # E.g. resized_tensor = [[a, a, b, c, d, d, e, ...]] + # E.g. resized_pil_img = [[a, b, c, c, d, e, f, ...]] + resized_tensor_f = resized_tensor + # we need to cast to uint8 to compare with PIL image + if resized_tensor_f.dtype == torch.uint8: + resized_tensor_f = resized_tensor_f.to(torch.float) + + # Pay attention to high tolerance for MAE + self.approxEqualTensorToPIL( + resized_tensor_f, resized_pil_img, tol=8.0, msg="{}, {}".format(size, interpolation) + ) + + if isinstance(size, int): + script_size = [size, ] + else: + script_size = size + + resize_result = script_fn(tensor, size=script_size, interpolation=interpolation, + max_size=max_size) + self.assertTrue(resized_tensor.equal(resize_result), msg="{}, {}".format(size, interpolation)) + + self._test_fn_on_batch( + batch_tensors, F.resize, size=script_size, interpolation=interpolation, max_size=max_size + ) + + # assert changed type warning + with self.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationMode"): + res1 = F.resize(tensor, size=32, interpolation=2) + res2 = F.resize(tensor, size=32, interpolation=BILINEAR) + self.assertTrue(res1.equal(res2)) + + for img in (tensor, pil_img): + exp_msg = "max_size should only be passed if size specifies the length of the smaller edge" + with self.assertRaisesRegex(ValueError, exp_msg): + F.resize(img, size=(32, 34), max_size=35) + with self.assertRaisesRegex(ValueError, "max_size = 32 must be strictly greater"): + F.resize(img, size=32, max_size=32) + + def test_resized_crop(self): + # test values of F.resized_crop in several cases: + # 1) resize to the same size, crop to the same size => should be identity + tensor, _ = self._create_data(26, 36, device=self.device) + + for mode in [NEAREST, BILINEAR, BICUBIC]: + out_tensor = F.resized_crop(tensor, top=0, left=0, height=26, width=36, size=[26, 36], interpolation=mode) + self.assertTrue(tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])) + + # 2) resize by half and crop a TL corner + tensor, _ = self._create_data(26, 36, device=self.device) + out_tensor = F.resized_crop(tensor, top=0, left=0, height=20, width=30, size=[10, 15], interpolation=NEAREST) + expected_out_tensor = tensor[:, :20:2, :30:2] + self.assertTrue( + expected_out_tensor.equal(out_tensor), + msg="{} vs {}".format(expected_out_tensor[0, :10, :10], out_tensor[0, :10, :10]) + ) + + batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device) + self._test_fn_on_batch( + batch_tensors, F.resized_crop, top=1, left=2, height=20, width=30, size=[10, 15], interpolation=NEAREST + ) + + def _test_affine_identity_map(self, tensor, scripted_affine): + # 1) identity map + out_tensor = F.affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST) + + self.assertTrue( + tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5]) + ) + out_tensor = scripted_affine( + tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST + ) + self.assertTrue( + tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5]) + ) + + def _test_affine_square_rotations(self, tensor, pil_img, scripted_affine): + # 2) Test rotation + test_configs = [ + (90, torch.rot90(tensor, k=1, dims=(-1, -2))), + (45, None), + (30, None), + (-30, None), + (-45, None), + (-90, torch.rot90(tensor, k=-1, dims=(-1, -2))), + (180, torch.rot90(tensor, k=2, dims=(-1, -2))), + ] + for a, true_tensor in test_configs: + out_pil_img = F.affine( + pil_img, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST + ) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))).to(self.device) + + for fn in [F.affine, scripted_affine]: + out_tensor = fn( + tensor, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST + ) + if true_tensor is not None: + self.assertTrue( + true_tensor.equal(out_tensor), + msg="{}\n{} vs \n{}".format(a, out_tensor[0, :5, :5], true_tensor[0, :5, :5]) + ) + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 6% of different pixels + self.assertLess( + ratio_diff_pixels, + 0.06, + msg="{}\n{} vs \n{}".format( + ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7] + ) + ) + + def _test_affine_rect_rotations(self, tensor, pil_img, scripted_affine): + test_configs = [ + 90, 45, 15, -30, -60, -120 + ] + for a in test_configs: + + out_pil_img = F.affine( + pil_img, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST + ) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) + + for fn in [F.affine, scripted_affine]: + out_tensor = fn( + tensor, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST + ).cpu() + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 3% of different pixels + self.assertLess( + ratio_diff_pixels, + 0.03, + msg="{}: {}\n{} vs \n{}".format( + a, ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7] + ) + ) + + def _test_affine_translations(self, tensor, pil_img, scripted_affine): + # 3) Test translation + test_configs = [ + [10, 12], (-12, -13) + ] + for t in test_configs: + + out_pil_img = F.affine(pil_img, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST) + + for fn in [F.affine, scripted_affine]: + out_tensor = fn(tensor, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], interpolation=NEAREST) + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + self.compareTensorToPIL(out_tensor, out_pil_img) + + def _test_affine_all_ops(self, tensor, pil_img, scripted_affine): + # 4) Test rotation + translation + scale + share + test_configs = [ + (45.5, [5, 6], 1.0, [0.0, 0.0], None), + (33, (5, -4), 1.0, [0.0, 0.0], [0, 0, 0]), + (45, [-5, 4], 1.2, [0.0, 0.0], (1, 2, 3)), + (33, (-4, -8), 2.0, [0.0, 0.0], [255, 255, 255]), + (85, (10, -10), 0.7, [0.0, 0.0], [1, ]), + (0, [0, 0], 1.0, [35.0, ], (2.0, )), + (-25, [0, 0], 1.2, [0.0, 15.0], None), + (-45, [-10, 0], 0.7, [2.0, 5.0], None), + (-45, [-10, -10], 1.2, [4.0, 5.0], None), + (-90, [0, 0], 1.0, [0.0, 0.0], None), + ] + for r in [NEAREST, ]: + for a, t, s, sh, f in test_configs: + f_pil = int(f[0]) if f is not None and len(f) == 1 else f + out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, interpolation=r, fill=f_pil) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) + + for fn in [F.affine, scripted_affine]: + out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, interpolation=r, fill=f).cpu() + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 5% (cpu), 6% (cuda) of different pixels + tol = 0.06 if self.device == "cuda" else 0.05 + self.assertLess( + ratio_diff_pixels, + tol, + msg="{}: {}\n{} vs \n{}".format( + (r, a, t, s, sh, f), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7] + ) + ) + + def test_affine(self): + # Tests on square and rectangular images + scripted_affine = torch.jit.script(F.affine) + + data = [self._create_data(26, 26, device=self.device), self._create_data(32, 26, device=self.device)] + for tensor, pil_img in data: + + for dt in [None, torch.float32, torch.float64, torch.float16]: + + if dt == torch.float16 and torch.device(self.device).type == "cpu": + # skip float16 on CPU case + continue + + if dt is not None: + tensor = tensor.to(dtype=dt) + + self._test_affine_identity_map(tensor, scripted_affine) + if pil_img.size[0] == pil_img.size[1]: + self._test_affine_square_rotations(tensor, pil_img, scripted_affine) + else: + self._test_affine_rect_rotations(tensor, pil_img, scripted_affine) + self._test_affine_translations(tensor, pil_img, scripted_affine) + self._test_affine_all_ops(tensor, pil_img, scripted_affine) + + batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device) + if dt is not None: + batch_tensors = batch_tensors.to(dtype=dt) + + self._test_fn_on_batch( + batch_tensors, F.affine, angle=-43, translate=[-3, 4], scale=1.2, shear=[4.0, 5.0] + ) + + tensor, pil_img = data[0] + # assert deprecation warning and non-BC + with self.assertWarnsRegex(UserWarning, r"Argument resample is deprecated and will be removed"): + res1 = F.affine(tensor, 45, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=2) + res2 = F.affine(tensor, 45, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=BILINEAR) + self.assertTrue(res1.equal(res2)) + + # assert changed type warning + with self.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationMode"): + res1 = F.affine(tensor, 45, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=2) + res2 = F.affine(tensor, 45, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], interpolation=BILINEAR) + self.assertTrue(res1.equal(res2)) + + with self.assertWarnsRegex(UserWarning, r"Argument fillcolor is deprecated and will be removed"): + res1 = F.affine(pil_img, 45, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], fillcolor=10) + res2 = F.affine(pil_img, 45, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], fill=10) + self.assertEqual(res1, res2) + + def _test_rotate_all_options(self, tensor, pil_img, scripted_rotate, centers): + img_size = pil_img.size + dt = tensor.dtype + for r in [NEAREST, ]: + for a in range(-180, 180, 17): + for e in [True, False]: + for c in centers: + for f in [None, [0, 0, 0], (1, 2, 3), [255, 255, 255], [1, ], (2.0, )]: + f_pil = int(f[0]) if f is not None and len(f) == 1 else f + out_pil_img = F.rotate(pil_img, angle=a, interpolation=r, expand=e, center=c, fill=f_pil) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) + for fn in [F.rotate, scripted_rotate]: + out_tensor = fn(tensor, angle=a, interpolation=r, expand=e, center=c, fill=f).cpu() + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + self.assertEqual( + out_tensor.shape, + out_pil_tensor.shape, + msg="{}: {} vs {}".format( + (img_size, r, dt, a, e, c), out_tensor.shape, out_pil_tensor.shape + )) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 3% of different pixels + self.assertLess( + ratio_diff_pixels, + 0.03, + msg="{}: {}\n{} vs \n{}".format( + (img_size, r, dt, a, e, c, f), + ratio_diff_pixels, + out_tensor[0, :7, :7], + out_pil_tensor[0, :7, :7] + ) + ) + + def test_rotate(self): + # Tests on square image + scripted_rotate = torch.jit.script(F.rotate) + + data = [self._create_data(26, 26, device=self.device), self._create_data(32, 26, device=self.device)] + for tensor, pil_img in data: + + img_size = pil_img.size + centers = [ + None, + (int(img_size[0] * 0.3), int(img_size[0] * 0.4)), + [int(img_size[0] * 0.5), int(img_size[0] * 0.6)] + ] + + for dt in [None, torch.float32, torch.float64, torch.float16]: + + if dt == torch.float16 and torch.device(self.device).type == "cpu": + # skip float16 on CPU case + continue + + if dt is not None: + tensor = tensor.to(dtype=dt) + + self._test_rotate_all_options(tensor, pil_img, scripted_rotate, centers) + + batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device) + if dt is not None: + batch_tensors = batch_tensors.to(dtype=dt) + + center = (20, 22) + self._test_fn_on_batch( + batch_tensors, F.rotate, angle=32, interpolation=NEAREST, expand=True, center=center + ) + tensor, pil_img = data[0] + # assert deprecation warning and non-BC + with self.assertWarnsRegex(UserWarning, r"Argument resample is deprecated and will be removed"): + res1 = F.rotate(tensor, 45, resample=2) + res2 = F.rotate(tensor, 45, interpolation=BILINEAR) + self.assertTrue(res1.equal(res2)) + + # assert changed type warning + with self.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationMode"): + res1 = F.rotate(tensor, 45, interpolation=2) + res2 = F.rotate(tensor, 45, interpolation=BILINEAR) + self.assertTrue(res1.equal(res2)) + + def _test_perspective(self, tensor, pil_img, scripted_transform, test_configs): + dt = tensor.dtype + for f in [None, [0, 0, 0], [1, 2, 3], [255, 255, 255], [1, ], (2.0, )]: + for r in [NEAREST, ]: + for spoints, epoints in test_configs: + f_pil = int(f[0]) if f is not None and len(f) == 1 else f + out_pil_img = F.perspective(pil_img, startpoints=spoints, endpoints=epoints, interpolation=r, + fill=f_pil) + out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1))) + + for fn in [F.perspective, scripted_transform]: + out_tensor = fn(tensor, startpoints=spoints, endpoints=epoints, interpolation=r, fill=f).cpu() + + if out_tensor.dtype != torch.uint8: + out_tensor = out_tensor.to(torch.uint8) + + num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0 + ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2] + # Tolerance : less than 5% of different pixels + self.assertLess( + ratio_diff_pixels, + 0.05, + msg="{}: {}\n{} vs \n{}".format( + (f, r, dt, spoints, epoints), + ratio_diff_pixels, + out_tensor[0, :7, :7], + out_pil_tensor[0, :7, :7] + ) + ) + + def test_perspective(self): + + from torchvision.transforms import RandomPerspective + + data = [self._create_data(26, 34, device=self.device), self._create_data(26, 26, device=self.device)] + scripted_transform = torch.jit.script(F.perspective) + + for tensor, pil_img in data: + + test_configs = [ + [[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]], + [[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]], + [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]], + ] + n = 10 + test_configs += [ + RandomPerspective.get_params(pil_img.size[0], pil_img.size[1], i / n) for i in range(n) + ] + + for dt in [None, torch.float32, torch.float64, torch.float16]: + + if dt == torch.float16 and torch.device(self.device).type == "cpu": + # skip float16 on CPU case + continue + + if dt is not None: + tensor = tensor.to(dtype=dt) + + self._test_perspective(tensor, pil_img, scripted_transform, test_configs) + + batch_tensors = self._create_data_batch(26, 36, num_samples=4, device=self.device) + if dt is not None: + batch_tensors = batch_tensors.to(dtype=dt) + + for spoints, epoints in test_configs: + self._test_fn_on_batch( + batch_tensors, F.perspective, startpoints=spoints, endpoints=epoints, interpolation=NEAREST + ) + + # assert changed type warning + spoints = [[0, 0], [33, 0], [33, 25], [0, 25]] + epoints = [[3, 2], [32, 3], [30, 24], [2, 25]] + with self.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationMode"): + res1 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=2) + res2 = F.perspective(tensor, startpoints=spoints, endpoints=epoints, interpolation=BILINEAR) + self.assertTrue(res1.equal(res2)) + + def test_gaussian_blur(self): + small_image_tensor = torch.from_numpy( + np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3)) + ).permute(2, 0, 1).to(self.device) + + large_image_tensor = torch.from_numpy( + np.arange(26 * 28, dtype="uint8").reshape((1, 26, 28)) + ).to(self.device) + + scripted_transform = torch.jit.script(F.gaussian_blur) + + # true_cv2_results = { + # # np_img = np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3)) + # # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.8) + # "3_3_0.8": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.5) + # "3_3_0.5": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.8) + # "3_5_0.8": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.5) + # "3_5_0.5": ... + # # np_img2 = np.arange(26 * 28, dtype="uint8").reshape((26, 28)) + # # cv2.GaussianBlur(np_img2, ksize=(23, 23), sigmaX=1.7) + # "23_23_1.7": ... + # } + p = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'assets', 'gaussian_blur_opencv_results.pt') + true_cv2_results = torch.load(p) + + for tensor in [small_image_tensor, large_image_tensor]: + + for dt in [None, torch.float32, torch.float64, torch.float16]: + if dt == torch.float16 and torch.device(self.device).type == "cpu": + # skip float16 on CPU case + continue + + if dt is not None: + tensor = tensor.to(dtype=dt) + + for ksize in [(3, 3), [3, 5], (23, 23)]: + for sigma in [[0.5, 0.5], (0.5, 0.5), (0.8, 0.8), (1.7, 1.7)]: + + _ksize = (ksize, ksize) if isinstance(ksize, int) else ksize + _sigma = sigma[0] if sigma is not None else None + shape = tensor.shape + gt_key = "{}_{}_{}__{}_{}_{}".format( + shape[-2], shape[-1], shape[-3], + _ksize[0], _ksize[1], _sigma + ) + if gt_key not in true_cv2_results: + continue + + true_out = torch.tensor( + true_cv2_results[gt_key] + ).reshape(shape[-2], shape[-1], shape[-3]).permute(2, 0, 1).to(tensor) + + for fn in [F.gaussian_blur, scripted_transform]: + out = fn(tensor, kernel_size=ksize, sigma=sigma) + self.assertEqual(true_out.shape, out.shape, msg="{}, {}".format(ksize, sigma)) + self.assertLessEqual( + torch.max(true_out.float() - out.float()), + 1.0, + msg="{}, {}".format(ksize, sigma) + ) + + def test_invert(self): + self._test_adjust_fn( + F.invert, + F_pil.invert, + F_t.invert, + [{}], + tol=1.0, + agg_method="max" + ) + + def test_posterize(self): + self._test_adjust_fn( + F.posterize, + F_pil.posterize, + F_t.posterize, + [{"bits": bits} for bits in range(0, 8)], + tol=1.0, + agg_method="max", + dts=(None,) + ) + + def test_solarize(self): + self._test_adjust_fn( + F.solarize, + F_pil.solarize, + F_t.solarize, + [{"threshold": threshold} for threshold in [0, 64, 128, 192, 255]], + tol=1.0, + agg_method="max", + dts=(None,) + ) + self._test_adjust_fn( + F.solarize, + lambda img, threshold: F_pil.solarize(img, 255 * threshold), + F_t.solarize, + [{"threshold": threshold} for threshold in [0.0, 0.25, 0.5, 0.75, 1.0]], + tol=1.0, + agg_method="max", + dts=(torch.float32, torch.float64) + ) + + def test_adjust_sharpness(self): + self._test_adjust_fn( + F.adjust_sharpness, + F_pil.adjust_sharpness, + F_t.adjust_sharpness, + [{"sharpness_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]] + ) + + def test_autocontrast(self): + self._test_adjust_fn( + F.autocontrast, + F_pil.autocontrast, + F_t.autocontrast, + [{}], + tol=1.0, + agg_method="max" + ) + + def test_equalize(self): + torch.set_deterministic(False) + self._test_adjust_fn( + F.equalize, + F_pil.equalize, + F_t.equalize, + [{}], + tol=1.0, + agg_method="max", + dts=(None,) + ) + + +@unittest.skipIf(not torch.cuda.is_available(), reason="Skip if no CUDA device") +class CUDATester(Tester): + + def setUp(self): + self.device = "cuda" + + def test_scale_channel(self): + """Make sure that _scale_channel gives the same results on CPU and GPU as + histc or bincount are used depending on the device. + """ + # TODO: when # https://github.com/pytorch/pytorch/issues/53194 is fixed, + # only use bincount and remove that test. + size = (1_000,) + img_chan = torch.randint(0, 256, size=size).to('cpu') + scaled_cpu = F_t._scale_channel(img_chan) + scaled_cuda = F_t._scale_channel(img_chan.to('cuda')) + self.assertTrue(scaled_cpu.equal(scaled_cuda.to('cpu'))) if __name__ == '__main__': diff --git a/test/test_hub.py b/test/test_hub.py index 4ae9e51021b..29ae90014d1 100644 --- a/test/test_hub.py +++ b/test/test_hub.py @@ -13,7 +13,7 @@ def sum_of_model_parameters(model): return s -SUM_OF_PRETRAINED_RESNET18_PARAMS = -12703.99609375 +SUM_OF_PRETRAINED_RESNET18_PARAMS = -12703.9931640625 @unittest.skipIf('torchvision' in sys.modules, @@ -31,8 +31,9 @@ def test_load_from_github(self): 'resnet18', pretrained=True, progress=False) - self.assertEqual(sum_of_model_parameters(hub_model).item(), - SUM_OF_PRETRAINED_RESNET18_PARAMS) + self.assertAlmostEqual(sum_of_model_parameters(hub_model).item(), + SUM_OF_PRETRAINED_RESNET18_PARAMS, + places=2) def test_set_dir(self): temp_dir = tempfile.gettempdir() @@ -42,8 +43,9 @@ def test_set_dir(self): 'resnet18', pretrained=True, progress=False) - self.assertEqual(sum_of_model_parameters(hub_model).item(), - SUM_OF_PRETRAINED_RESNET18_PARAMS) + self.assertAlmostEqual(sum_of_model_parameters(hub_model).item(), + SUM_OF_PRETRAINED_RESNET18_PARAMS, + places=2) self.assertTrue(os.path.exists(temp_dir + '/pytorch_vision_master')) shutil.rmtree(temp_dir + '/pytorch_vision_master') diff --git a/test/test_image.py b/test/test_image.py new file mode 100644 index 00000000000..ebc9a221f6d --- /dev/null +++ b/test/test_image.py @@ -0,0 +1,282 @@ +import glob +import io +import os +import unittest + +import numpy as np +import torch +from PIL import Image +from common_utils import get_tmp_dir + +from torchvision.io.image import ( + decode_png, decode_jpeg, encode_jpeg, write_jpeg, decode_image, read_file, + encode_png, write_png, write_file, ImageReadMode) + +IMAGE_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") +FAKEDATA_DIR = os.path.join(IMAGE_ROOT, "fakedata") +IMAGE_DIR = os.path.join(FAKEDATA_DIR, "imagefolder") +DAMAGED_JPEG = os.path.join(IMAGE_ROOT, 'damaged_jpeg') +ENCODE_JPEG = os.path.join(IMAGE_ROOT, "encode_jpeg") + + +def get_images(directory, img_ext): + assert os.path.isdir(directory) + image_paths = glob.glob(directory + f'/**/*{img_ext}', recursive=True) + for path in image_paths: + if path.split(os.sep)[-2] not in ['damaged_jpeg', 'jpeg_write']: + yield path + + +def pil_read_image(img_path): + with Image.open(img_path) as img: + return torch.from_numpy(np.array(img)) + + +def normalize_dimensions(img_pil): + if len(img_pil.shape) == 3: + img_pil = img_pil.permute(2, 0, 1) + else: + img_pil = img_pil.unsqueeze(0) + return img_pil + + +class ImageTester(unittest.TestCase): + def test_decode_jpeg(self): + conversion = [(None, ImageReadMode.UNCHANGED), ("L", ImageReadMode.GRAY), ("RGB", ImageReadMode.RGB)] + for img_path in get_images(IMAGE_ROOT, ".jpg"): + for pil_mode, mode in conversion: + with Image.open(img_path) as img: + is_cmyk = img.mode == "CMYK" + if pil_mode is not None: + if is_cmyk: + # libjpeg does not support the conversion + continue + img = img.convert(pil_mode) + img_pil = torch.from_numpy(np.array(img)) + if is_cmyk: + # flip the colors to match libjpeg + img_pil = 255 - img_pil + + img_pil = normalize_dimensions(img_pil) + data = read_file(img_path) + img_ljpeg = decode_image(data, mode=mode) + + # Permit a small variation on pixel values to account for implementation + # differences between Pillow and LibJPEG. + abs_mean_diff = (img_ljpeg.type(torch.float32) - img_pil).abs().mean().item() + self.assertTrue(abs_mean_diff < 2) + + with self.assertRaisesRegex(RuntimeError, "Expected a non empty 1-dimensional tensor"): + decode_jpeg(torch.empty((100, 1), dtype=torch.uint8)) + + with self.assertRaisesRegex(RuntimeError, "Expected a torch.uint8 tensor"): + decode_jpeg(torch.empty((100,), dtype=torch.float16)) + + with self.assertRaises(RuntimeError): + decode_jpeg(torch.empty((100), dtype=torch.uint8)) + + def test_damaged_images(self): + # Test image with bad Huffman encoding (should not raise) + bad_huff = read_file(os.path.join(DAMAGED_JPEG, 'bad_huffman.jpg')) + try: + _ = decode_jpeg(bad_huff) + except RuntimeError: + self.assertTrue(False) + + # Truncated images should raise an exception + truncated_images = glob.glob( + os.path.join(DAMAGED_JPEG, 'corrupt*.jpg')) + for image_path in truncated_images: + data = read_file(image_path) + with self.assertRaises(RuntimeError): + decode_jpeg(data) + + def test_encode_jpeg(self): + for img_path in get_images(ENCODE_JPEG, ".jpg"): + dirname = os.path.dirname(img_path) + filename, _ = os.path.splitext(os.path.basename(img_path)) + write_folder = os.path.join(dirname, 'jpeg_write') + expected_file = os.path.join( + write_folder, '{0}_pil.jpg'.format(filename)) + img = decode_jpeg(read_file(img_path)) + + with open(expected_file, 'rb') as f: + pil_bytes = f.read() + pil_bytes = torch.as_tensor(list(pil_bytes), dtype=torch.uint8) + for src_img in [img, img.contiguous()]: + # PIL sets jpeg quality to 75 by default + jpeg_bytes = encode_jpeg(src_img, quality=75) + self.assertTrue(jpeg_bytes.equal(pil_bytes)) + + with self.assertRaisesRegex( + RuntimeError, "Input tensor dtype should be uint8"): + encode_jpeg(torch.empty((3, 100, 100), dtype=torch.float32)) + + with self.assertRaisesRegex( + ValueError, "Image quality should be a positive number " + "between 1 and 100"): + encode_jpeg(torch.empty((3, 100, 100), dtype=torch.uint8), quality=-1) + + with self.assertRaisesRegex( + ValueError, "Image quality should be a positive number " + "between 1 and 100"): + encode_jpeg(torch.empty((3, 100, 100), dtype=torch.uint8), quality=101) + + with self.assertRaisesRegex( + RuntimeError, "The number of channels should be 1 or 3, got: 5"): + encode_jpeg(torch.empty((5, 100, 100), dtype=torch.uint8)) + + with self.assertRaisesRegex( + RuntimeError, "Input data should be a 3-dimensional tensor"): + encode_jpeg(torch.empty((1, 3, 100, 100), dtype=torch.uint8)) + + with self.assertRaisesRegex( + RuntimeError, "Input data should be a 3-dimensional tensor"): + encode_jpeg(torch.empty((100, 100), dtype=torch.uint8)) + + def test_write_jpeg(self): + with get_tmp_dir() as d: + for img_path in get_images(ENCODE_JPEG, ".jpg"): + data = read_file(img_path) + img = decode_jpeg(data) + + basedir = os.path.dirname(img_path) + filename, _ = os.path.splitext(os.path.basename(img_path)) + torch_jpeg = os.path.join( + d, '{0}_torch.jpg'.format(filename)) + pil_jpeg = os.path.join( + basedir, 'jpeg_write', '{0}_pil.jpg'.format(filename)) + + write_jpeg(img, torch_jpeg, quality=75) + + with open(torch_jpeg, 'rb') as f: + torch_bytes = f.read() + + with open(pil_jpeg, 'rb') as f: + pil_bytes = f.read() + + self.assertEqual(torch_bytes, pil_bytes) + + def test_decode_png(self): + conversion = [(None, ImageReadMode.UNCHANGED), ("L", ImageReadMode.GRAY), ("LA", ImageReadMode.GRAY_ALPHA), + ("RGB", ImageReadMode.RGB), ("RGBA", ImageReadMode.RGB_ALPHA)] + for img_path in get_images(FAKEDATA_DIR, ".png"): + for pil_mode, mode in conversion: + with Image.open(img_path) as img: + if pil_mode is not None: + img = img.convert(pil_mode) + img_pil = torch.from_numpy(np.array(img)) + + img_pil = normalize_dimensions(img_pil) + data = read_file(img_path) + img_lpng = decode_image(data, mode=mode) + + tol = 0 if conversion is None else 1 + self.assertTrue(img_lpng.allclose(img_pil, atol=tol)) + + with self.assertRaises(RuntimeError): + decode_png(torch.empty((), dtype=torch.uint8)) + with self.assertRaises(RuntimeError): + decode_png(torch.randint(3, 5, (300,), dtype=torch.uint8)) + + def test_encode_png(self): + for img_path in get_images(IMAGE_DIR, '.png'): + pil_image = Image.open(img_path) + img_pil = torch.from_numpy(np.array(pil_image)) + img_pil = img_pil.permute(2, 0, 1) + png_buf = encode_png(img_pil, compression_level=6) + + rec_img = Image.open(io.BytesIO(bytes(png_buf.tolist()))) + rec_img = torch.from_numpy(np.array(rec_img)) + rec_img = rec_img.permute(2, 0, 1) + + self.assertTrue(img_pil.equal(rec_img)) + + with self.assertRaisesRegex( + RuntimeError, "Input tensor dtype should be uint8"): + encode_png(torch.empty((3, 100, 100), dtype=torch.float32)) + + with self.assertRaisesRegex( + RuntimeError, "Compression level should be between 0 and 9"): + encode_png(torch.empty((3, 100, 100), dtype=torch.uint8), + compression_level=-1) + + with self.assertRaisesRegex( + RuntimeError, "Compression level should be between 0 and 9"): + encode_png(torch.empty((3, 100, 100), dtype=torch.uint8), + compression_level=10) + + with self.assertRaisesRegex( + RuntimeError, "The number of channels should be 1 or 3, got: 5"): + encode_png(torch.empty((5, 100, 100), dtype=torch.uint8)) + + def test_write_png(self): + with get_tmp_dir() as d: + for img_path in get_images(IMAGE_DIR, '.png'): + pil_image = Image.open(img_path) + img_pil = torch.from_numpy(np.array(pil_image)) + img_pil = img_pil.permute(2, 0, 1) + + filename, _ = os.path.splitext(os.path.basename(img_path)) + torch_png = os.path.join(d, '{0}_torch.png'.format(filename)) + write_png(img_pil, torch_png, compression_level=6) + saved_image = torch.from_numpy(np.array(Image.open(torch_png))) + saved_image = saved_image.permute(2, 0, 1) + + self.assertTrue(img_pil.equal(saved_image)) + + def test_read_file(self): + with get_tmp_dir() as d: + fname, content = 'test1.bin', b'TorchVision\211\n' + fpath = os.path.join(d, fname) + with open(fpath, 'wb') as f: + f.write(content) + + data = read_file(fpath) + expected = torch.tensor(list(content), dtype=torch.uint8) + self.assertTrue(data.equal(expected)) + os.unlink(fpath) + + with self.assertRaisesRegex( + RuntimeError, "No such file or directory: 'tst'"): + read_file('tst') + + def test_read_file_non_ascii(self): + with get_tmp_dir() as d: + fname, content = '日本語(Japanese).bin', b'TorchVision\211\n' + fpath = os.path.join(d, fname) + with open(fpath, 'wb') as f: + f.write(content) + + data = read_file(fpath) + expected = torch.tensor(list(content), dtype=torch.uint8) + self.assertTrue(data.equal(expected)) + os.unlink(fpath) + + def test_write_file(self): + with get_tmp_dir() as d: + fname, content = 'test1.bin', b'TorchVision\211\n' + fpath = os.path.join(d, fname) + content_tensor = torch.tensor(list(content), dtype=torch.uint8) + write_file(fpath, content_tensor) + + with open(fpath, 'rb') as f: + saved_content = f.read() + self.assertEqual(content, saved_content) + os.unlink(fpath) + + def test_write_file_non_ascii(self): + with get_tmp_dir() as d: + fname, content = '日本語(Japanese).bin', b'TorchVision\211\n' + fpath = os.path.join(d, fname) + content_tensor = torch.tensor(list(content), dtype=torch.uint8) + write_file(fpath, content_tensor) + + with open(fpath, 'rb') as f: + saved_content = f.read() + self.assertEqual(content, saved_content) + os.unlink(fpath) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_internet.py b/test/test_internet.py new file mode 100644 index 00000000000..05496752c7f --- /dev/null +++ b/test/test_internet.py @@ -0,0 +1,71 @@ +"""This file should contain all tests that need access to the internet (apart +from the ones in test_datasets_download.py) + +We want to bundle all internet-related tests in one file, so the file can be +cleanly ignored in FB internal test infra. +""" + +import os +import unittest +import unittest.mock +import warnings +from urllib.error import URLError + +import torchvision.datasets.utils as utils +from common_utils import get_tmp_dir + + +class DatasetUtilsTester(unittest.TestCase): + + def test_get_redirect_url(self): + url = "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz" + expected = "https://drive.google.com/file/d/1hbzc_P1FuxMkcabkgn9ZKinBwW683j45/view" + + actual = utils._get_redirect_url(url) + assert actual == expected + + def test_get_redirect_url_max_hops_exceeded(self): + url = "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz" + with self.assertRaises(RecursionError): + utils._get_redirect_url(url, max_hops=0) + + def test_download_url(self): + with get_tmp_dir() as temp_dir: + url = "http://github.com/pytorch/vision/archive/master.zip" + try: + utils.download_url(url, temp_dir) + self.assertFalse(len(os.listdir(temp_dir)) == 0) + except URLError: + msg = "could not download test file '{}'".format(url) + warnings.warn(msg, RuntimeWarning) + raise unittest.SkipTest(msg) + + def test_download_url_retry_http(self): + with get_tmp_dir() as temp_dir: + url = "https://github.com/pytorch/vision/archive/master.zip" + try: + utils.download_url(url, temp_dir) + self.assertFalse(len(os.listdir(temp_dir)) == 0) + except URLError: + msg = "could not download test file '{}'".format(url) + warnings.warn(msg, RuntimeWarning) + raise unittest.SkipTest(msg) + + def test_download_url_dont_exist(self): + with get_tmp_dir() as temp_dir: + url = "http://github.com/pytorch/vision/archive/this_doesnt_exist.zip" + with self.assertRaises(URLError): + utils.download_url(url, temp_dir) + + @unittest.mock.patch("torchvision.datasets.utils.download_file_from_google_drive") + def test_download_url_dispatch_download_from_google_drive(self, mock): + url = "https://drive.google.com/file/d/1hbzc_P1FuxMkcabkgn9ZKinBwW683j45/view" + + id = "1hbzc_P1FuxMkcabkgn9ZKinBwW683j45" + filename = "filename" + md5 = "md5" + + with get_tmp_dir() as root: + utils.download_url(url, root, filename, md5) + + mock.assert_called_once_with(id, root, filename, md5) diff --git a/test/test_io.py b/test/test_io.py index db292b73e0f..7d752bdbcf7 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -1,20 +1,16 @@ import os import contextlib +import sys import tempfile import torch -import torchvision.datasets.utils as utils import torchvision.io as io from torchvision import get_video_backend import unittest -import sys import warnings +from urllib.error import URLError from common_utils import get_tmp_dir -if sys.version_info < (3,): - from urllib2 import URLError -else: - from urllib.error import URLError try: import av @@ -24,6 +20,9 @@ av = None +VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos") + + def _create_video_frames(num_frames, height, width): y, x = torch.meshgrid(torch.linspace(-2, 2, height), torch.linspace(-2, 2, width)) data = [] @@ -58,15 +57,16 @@ def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, data = _create_video_frames(num_frames, height, width) with tempfile.NamedTemporaryFile(suffix='.mp4') as f: + f.close() io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options) yield f.name, data + os.unlink(f.name) @unittest.skipIf(get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT, "video_reader backend not available") @unittest.skipIf(av is None, "PyAV unavailable") -@unittest.skipIf(sys.platform == 'win32', 'temporarily disabled on Windows') -class Tester(unittest.TestCase): +class TestIO(unittest.TestCase): # compression adds artifacts, thus we add a tolerance of # 6 in 0-255 range TOLERANCE = 6 @@ -81,8 +81,8 @@ def test_write_read_video(self): def test_probe_video_from_file(self): with temp_video(10, 300, 300, 5) as (f_name, data): video_info = io._probe_video_from_file(f_name) - self.assertAlmostEqual(video_info["video_duration"], 2, delta=0.1) - self.assertAlmostEqual(video_info["video_fps"], 5, delta=0.1) + self.assertAlmostEqual(video_info.video_duration, 2, delta=0.1) + self.assertAlmostEqual(video_info.video_fps, 5, delta=0.1) @unittest.skipIf(not io._HAS_VIDEO_OPT, "video_reader backend is not chosen") def test_probe_video_from_memory(self): @@ -90,8 +90,8 @@ def test_probe_video_from_memory(self): with open(f_name, "rb") as fp: filebuffer = fp.read() video_info = io._probe_video_from_memory(filebuffer) - self.assertAlmostEqual(video_info["video_duration"], 2, delta=0.1) - self.assertAlmostEqual(video_info["video_fps"], 5, delta=0.1) + self.assertAlmostEqual(video_info.video_duration, 2, delta=0.1) + self.assertAlmostEqual(video_info.video_fps, 5, delta=0.1) def test_read_timestamps(self): with temp_video(10, 300, 300, 5) as (f_name, data): @@ -106,15 +106,16 @@ def test_read_timestamps(self): expected_pts = [i * pts_step for i in range(num_frames)] self.assertEqual(pts, expected_pts) + container.close() def test_read_partial_video(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) for start in range(5): - for l in range(1, 4): - lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1]) - s_data = data[start:(start + l)] - self.assertEqual(len(lv), l) + for offset in range(1, 4): + lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1]) + s_data = data[start:(start + offset)] + self.assertEqual(len(lv), offset) self.assertTrue(s_data.equal(lv)) if get_video_backend() == "pyav": @@ -130,10 +131,10 @@ def test_read_partial_video_bframes(self): with temp_video(100, 300, 300, 5, options=options) as (f_name, data): pts, _ = io.read_video_timestamps(f_name) for start in range(0, 80, 20): - for l in range(1, 4): - lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1]) - s_data = data[start:(start + l)] - self.assertEqual(len(lv), l) + for offset in range(1, 4): + lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1]) + s_data = data[start:(start + offset)] + self.assertEqual(len(lv), offset) self.assertTrue((s_data.float() - lv.float()).abs().max() < self.TOLERANCE) lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7]) @@ -146,20 +147,12 @@ def test_read_partial_video_bframes(self): self.assertTrue((data[5:8].float() - lv.float()).abs().max() < self.TOLERANCE) def test_read_packed_b_frames_divx_file(self): - with get_tmp_dir() as temp_dir: - name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi" - f_name = os.path.join(temp_dir, name) - url = "https://download.pytorch.org/vision_tests/io/" + name - try: - utils.download_url(url, temp_dir) - pts, fps = io.read_video_timestamps(f_name) - - self.assertEqual(pts, sorted(pts)) - self.assertEqual(fps, 30) - except URLError: - msg = "could not download test file '{}'".format(url) - warnings.warn(msg, RuntimeWarning) - raise unittest.SkipTest(msg) + name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi" + f_name = os.path.join(VIDEO_DIR, name) + pts, fps = io.read_video_timestamps(f_name) + + self.assertEqual(pts, sorted(pts)) + self.assertEqual(fps, 30) def test_read_timestamps_from_packet(self): with temp_video(10, 300, 300, 5, video_codec='mpeg4') as (f_name, data): @@ -176,6 +169,7 @@ def test_read_timestamps_from_packet(self): expected_pts = [i * pts_step for i in range(num_frames)] self.assertEqual(pts, expected_pts) + container.close() def test_read_video_pts_unit_sec(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): @@ -196,16 +190,17 @@ def test_read_timestamps_pts_unit_sec(self): expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)] self.assertEqual(pts, expected_pts) + container.close() def test_read_partial_video_pts_unit_sec(self): with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data): pts, _ = io.read_video_timestamps(f_name, pts_unit='sec') for start in range(5): - for l in range(1, 4): - lv, _, _ = io.read_video(f_name, pts[start], pts[start + l - 1], pts_unit='sec') - s_data = data[start:(start + l)] - self.assertEqual(len(lv), l) + for offset in range(1, 4): + lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1], pts_unit='sec') + s_data = data[start:(start + offset)] + self.assertEqual(len(lv), offset) self.assertTrue(s_data.equal(lv)) container = av.open(f_name) @@ -218,6 +213,7 @@ def test_read_partial_video_pts_unit_sec(self): # when the given start pts is not matching any frame pts self.assertEqual(len(lv), 4) self.assertTrue(data[4:8].equal(lv)) + container.close() def test_read_video_corrupted_file(self): with tempfile.NamedTemporaryFile(suffix='.mp4') as f: @@ -236,6 +232,7 @@ def test_read_video_timestamps_corrupted_file(self): self.assertEqual(video_pts, []) self.assertIs(video_fps, None) + @unittest.skip("Temporarily disabled due to new pyav") def test_read_video_partially_corrupted_file(self): with temp_video(5, 4, 4, 5, lossless=True) as (f_name, data): with open(f_name, 'r+b') as f: @@ -258,6 +255,39 @@ def test_read_video_partially_corrupted_file(self): # and the last few frames are wrong self.assertFalse(video.equal(data)) + @unittest.skipIf(sys.platform == 'win32', 'temporarily disabled on Windows') + def test_write_video_with_audio(self): + f_name = os.path.join(VIDEO_DIR, "R6llTwEh07w.mp4") + video_tensor, audio_tensor, info = io.read_video(f_name, pts_unit="sec") + + with get_tmp_dir() as tmpdir: + out_f_name = os.path.join(tmpdir, "testing.mp4") + io.video.write_video( + out_f_name, + video_tensor, + round(info["video_fps"]), + video_codec="libx264rgb", + options={'crf': '0'}, + audio_array=audio_tensor, + audio_fps=info["audio_fps"], + audio_codec="aac", + ) + + out_video_tensor, out_audio_tensor, out_info = io.read_video( + out_f_name, pts_unit="sec" + ) + + self.assertEqual(info["video_fps"], out_info["video_fps"]) + self.assertTrue(video_tensor.equal(out_video_tensor)) + + audio_stream = av.open(f_name).streams.audio[0] + out_audio_stream = av.open(out_f_name).streams.audio[0] + + self.assertEqual(info["audio_fps"], out_info["audio_fps"]) + self.assertEqual(audio_stream.rate, out_audio_stream.rate) + self.assertAlmostEqual(audio_stream.frames, out_audio_stream.frames, delta=1) + self.assertEqual(audio_stream.frame_size, out_audio_stream.frame_size) + # TODO add tests for audio diff --git a/test/test_io_opt.py b/test/test_io_opt.py index 1ad3dea8fa2..87698b34624 100644 --- a/test/test_io_opt.py +++ b/test/test_io_opt.py @@ -3,7 +3,8 @@ import test_io -set_video_backend('video_reader') +# Disabling the video backend switching temporarily +# set_video_backend('video_reader') if __name__ == '__main__': diff --git a/test/test_models.py b/test/test_models.py index c70ef6830bf..90855fb71df 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1,18 +1,13 @@ -from common_utils import TestCase, map_nested_tensor_object +from common_utils import TestCase, map_nested_tensor_object, freeze_rng_state, set_rng_seed from collections import OrderedDict from itertools import product +import functools +import operator import torch -import numpy as np +import torch.nn as nn from torchvision import models import unittest -import traceback -import random - - -def set_rng_seed(seed): - torch.manual_seed(seed) - random.seed(seed) - np.random.seed(seed) +import warnings def get_available_classification_models(): @@ -35,116 +30,247 @@ def get_available_video_models(): return [k for k, v in models.video.__dict__.items() if callable(v) and k[0].lower() == k[0] and k[0] != "_"] -# models that are in torch hub, as well as r3d_18. we tried testing all models -# but the test was too slow. not included are detection models, because -# they are not yet supported in JIT. -script_test_models = [ +# If 'unwrapper' is provided it will be called with the script model outputs +# before they are compared to the eager model outputs. This is useful if the +# model outputs are different between TorchScript / Eager mode +script_model_unwrapper = { + 'googlenet': lambda x: x.logits, + 'inception_v3': lambda x: x.logits, + "fasterrcnn_resnet50_fpn": lambda x: x[1], + "fasterrcnn_mobilenet_v3_large_fpn": lambda x: x[1], + "fasterrcnn_mobilenet_v3_large_320_fpn": lambda x: x[1], + "maskrcnn_resnet50_fpn": lambda x: x[1], + "keypointrcnn_resnet50_fpn": lambda x: x[1], + "retinanet_resnet50_fpn": lambda x: x[1], +} + + +# The following models exhibit flaky numerics under autocast in _test_*_model harnesses. +# This may be caused by the harness environment (e.g. num classes, input initialization +# via torch.rand), and does not prove autocast is unsuitable when training with real data +# (autocast has been used successfully with real data for some of these models). +# TODO: investigate why autocast numerics are flaky in the harnesses. +# +# For the following models, _test_*_model harnesses skip numerical checks on outputs when +# trying autocast. However, they still try an autocasted forward pass, so they still ensure +# autocast coverage suffices to prevent dtype errors in each model. +autocast_flaky_numerics = ( + "inception_v3", + "resnet101", + "resnet152", + "wide_resnet101_2", + "deeplabv3_resnet50", "deeplabv3_resnet101", - "mobilenet_v2", - "resnext50_32x4d", + "deeplabv3_mobilenet_v3_large", + "fcn_resnet50", "fcn_resnet101", - "googlenet", - "densenet121", - "resnet18", - "alexnet", - "shufflenet_v2_x1_0", - "squeezenet1_0", - "vgg11", - "inception_v3", - 'r3d_18', -] + "lraspp_mobilenet_v3_large", + "maskrcnn_resnet50_fpn", +) class ModelTester(TestCase): - def check_script(self, model, name): - if name not in script_test_models: - return - scriptable = True - msg = "" - try: - torch.jit.script(model) - except Exception as e: - tb = traceback.format_exc() - scriptable = False - msg = str(e) + str(tb) - self.assertTrue(scriptable, msg) - - def _test_classification_model(self, name, input_shape): + def _test_classification_model(self, name, input_shape, dev): + set_rng_seed(0) # passing num_class equal to a number other than 1000 helps in making the test # more enforcing in nature - set_rng_seed(0) model = models.__dict__[name](num_classes=50) - self.check_script(model, name) - model.eval() - x = torch.rand(input_shape) + model.eval().to(device=dev) + # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests + x = torch.rand(input_shape).to(device=dev) out = model(x) - self.assertExpected(out, rtol=1e-2, atol=0.) + self.assertExpected(out.cpu(), prec=0.1, strip_suffix=f"_{dev}") self.assertEqual(out.shape[-1], 50) + self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None)) - def _test_segmentation_model(self, name): - # passing num_class equal to a number other than 1000 helps in making the test - # more enforcing in nature - model = models.segmentation.__dict__[name](num_classes=50, pretrained_backbone=False) - self.check_script(model, name) - model.eval() - input_shape = (1, 3, 300, 300) - x = torch.rand(input_shape) - out = model(x) - self.assertEqual(tuple(out["out"].shape), (1, 50, 300, 300)) + if dev == torch.device("cuda"): + with torch.cuda.amp.autocast(): + out = model(x) + # See autocast_flaky_numerics comment at top of file. + if name not in autocast_flaky_numerics: + self.assertExpected(out.cpu(), prec=0.1, strip_suffix=f"_{dev}") + self.assertEqual(out.shape[-1], 50) - def _test_detection_model(self, name): + def _test_segmentation_model(self, name, dev): set_rng_seed(0) - model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False) - self.check_script(model, name) - model.eval() + # passing num_classes equal to a number other than 21 helps in making the test's + # expected file size smaller + model = models.segmentation.__dict__[name](num_classes=10, pretrained_backbone=False) + model.eval().to(device=dev) + input_shape = (1, 3, 32, 32) + # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests + x = torch.rand(input_shape).to(device=dev) + out = model(x)["out"] + + def check_out(out): + prec = 0.01 + strip_suffix = f"_{dev}" + try: + # We first try to assert the entire output if possible. This is not + # only the best way to assert results but also handles the cases + # where we need to create a new expected result. + self.assertExpected(out.cpu(), prec=prec, strip_suffix=strip_suffix) + except AssertionError: + # Unfortunately some segmentation models are flaky with autocast + # so instead of validating the probability scores, check that the class + # predictions match. + expected_file = self._get_expected_file(strip_suffix=strip_suffix) + expected = torch.load(expected_file) + self.assertEqual(out.argmax(dim=1), expected.argmax(dim=1), prec=prec) + return False # Partial validation performed + + return True # Full validation performed + + full_validation = check_out(out) + + self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None)) + + if dev == torch.device("cuda"): + with torch.cuda.amp.autocast(): + out = model(x)["out"] + # See autocast_flaky_numerics comment at top of file. + if name not in autocast_flaky_numerics: + full_validation &= check_out(out) + + if not full_validation: + msg = "The output of {} could only be partially validated. " \ + "This is likely due to unit-test flakiness, but you may " \ + "want to do additional manual checks if you made " \ + "significant changes to the codebase.".format(self._testMethodName) + warnings.warn(msg, RuntimeWarning) + raise unittest.SkipTest(msg) + + def _test_detection_model(self, name, dev): + set_rng_seed(0) + kwargs = {} + if "retinanet" in name: + # Reduce the default threshold to ensure the returned boxes are not empty. + kwargs["score_thresh"] = 0.01 + elif "fasterrcnn_mobilenet_v3_large" in name: + kwargs["box_score_thresh"] = 0.02076 + if "fasterrcnn_mobilenet_v3_large_320_fpn" in name: + kwargs["rpn_pre_nms_top_n_test"] = 1000 + kwargs["rpn_post_nms_top_n_test"] = 1000 + model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False, **kwargs) + model.eval().to(device=dev) input_shape = (3, 300, 300) - x = torch.rand(input_shape) + # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests + x = torch.rand(input_shape).to(device=dev) model_input = [x] out = model(model_input) self.assertIs(model_input[0], x) - self.assertEqual(len(out), 1) - def subsample_tensor(tensor): - num_elems = tensor.numel() - num_samples = 20 - if num_elems <= num_samples: - return tensor - - flat_tensor = tensor.flatten() - ith_index = num_elems // num_samples - return flat_tensor[ith_index - 1::ith_index] - - def compute_mean_std(tensor): - # can't compute mean of integral tensor - tensor = tensor.to(torch.double) - mean = torch.mean(tensor) - std = torch.std(tensor) - return {"mean": mean, "std": std} - - # maskrcnn_resnet_50_fpn numerically unstable across platforms, so for now - # compare results with mean and std - if name == "maskrcnn_resnet50_fpn": - test_value = map_nested_tensor_object(out, tensor_map_fn=compute_mean_std) - # mean values are small, use large rtol - self.assertExpected(test_value, rtol=.01, atol=.01) - else: - self.assertExpected(map_nested_tensor_object(out, tensor_map_fn=subsample_tensor)) + def check_out(out): + self.assertEqual(len(out), 1) + + def compact(tensor): + size = tensor.size() + elements_per_sample = functools.reduce(operator.mul, size[1:], 1) + if elements_per_sample > 30: + return compute_mean_std(tensor) + else: + return subsample_tensor(tensor) + + def subsample_tensor(tensor): + num_elems = tensor.size(0) + num_samples = 20 + if num_elems <= num_samples: + return tensor + + ith_index = num_elems // num_samples + return tensor[ith_index - 1::ith_index] + + def compute_mean_std(tensor): + # can't compute mean of integral tensor + tensor = tensor.to(torch.double) + mean = torch.mean(tensor) + std = torch.std(tensor) + return {"mean": mean, "std": std} + + output = map_nested_tensor_object(out, tensor_map_fn=compact) + prec = 0.01 + strip_suffix = f"_{dev}" + try: + # We first try to assert the entire output if possible. This is not + # only the best way to assert results but also handles the cases + # where we need to create a new expected result. + self.assertExpected(output, prec=prec, strip_suffix=strip_suffix) + except AssertionError: + # Unfortunately detection models are flaky due to the unstable sort + # in NMS. If matching across all outputs fails, use the same approach + # as in NMSTester.test_nms_cuda to see if this is caused by duplicate + # scores. + expected_file = self._get_expected_file(strip_suffix=strip_suffix) + expected = torch.load(expected_file) + self.assertEqual(output[0]["scores"], expected[0]["scores"], prec=prec) + + # Note: Fmassa proposed turning off NMS by adapting the threshold + # and then using the Hungarian algorithm as in DETR to find the + # best match between output and expected boxes and eliminate some + # of the flakiness. Worth exploring. + return False # Partial validation performed + + return True # Full validation performed + + full_validation = check_out(out) + self.check_jit_scriptable(model, ([x],), unwrapper=script_model_unwrapper.get(name, None)) + + if dev == torch.device("cuda"): + with torch.cuda.amp.autocast(): + out = model(model_input) + # See autocast_flaky_numerics comment at top of file. + if name not in autocast_flaky_numerics: + full_validation &= check_out(out) + + if not full_validation: + msg = "The output of {} could only be partially validated. " \ + "This is likely due to unit-test flakiness, but you may " \ + "want to do additional manual checks if you made " \ + "significant changes to the codebase.".format(self._testMethodName) + warnings.warn(msg, RuntimeWarning) + raise unittest.SkipTest(msg) + + def _test_detection_model_validation(self, name): + set_rng_seed(0) + model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False) + input_shape = (3, 300, 300) + x = [torch.rand(input_shape)] + + # validate that targets are present in training + self.assertRaises(ValueError, model, x) - self.assertTrue("boxes" in out[0]) - self.assertTrue("scores" in out[0]) - self.assertTrue("labels" in out[0]) + # validate type + targets = [{'boxes': 0.}] + self.assertRaises(ValueError, model, x, targets=targets) + + # validate boxes shape + for boxes in (torch.rand((4,)), torch.rand((1, 5))): + targets = [{'boxes': boxes}] + self.assertRaises(ValueError, model, x, targets=targets) + + # validate that no degenerate boxes are present + boxes = torch.tensor([[1, 3, 1, 4], [2, 4, 3, 4]]) + targets = [{'boxes': boxes}] + self.assertRaises(ValueError, model, x, targets=targets) - def _test_video_model(self, name): + def _test_video_model(self, name, dev): # the default input shape is # bs * num_channels * clip_len * h *w input_shape = (1, 3, 4, 112, 112) # test both basicblock and Bottleneck model = models.video.__dict__[name](num_classes=50) - self.check_script(model, name) - x = torch.rand(input_shape) + model.eval().to(device=dev) + # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests + x = torch.rand(input_shape).to(device=dev) out = model(x) + self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None)) self.assertEqual(out.shape[-1], 50) + if dev == torch.device("cuda"): + with torch.cuda.amp.autocast(): + out = model(x) + self.assertEqual(out.shape[-1], 50) + def _make_sliced_model(self, model, stop_layer): layers = OrderedDict() for name, layer in model.named_children(): @@ -161,9 +287,11 @@ def test_memory_efficient_densenet(self): for name in ['densenet121', 'densenet169', 'densenet201', 'densenet161']: model1 = models.__dict__[name](num_classes=50, memory_efficient=True) params = model1.state_dict() + num_params = sum([x.numel() for x in model1.parameters()]) model1.eval() out1 = model1(x) out1.sum().backward() + num_grad = sum([x.grad.numel() for x in model1.parameters() if x.grad is not None]) model2 = models.__dict__[name](num_classes=50, memory_efficient=False) model2.load_state_dict(params) @@ -172,6 +300,7 @@ def test_memory_efficient_densenet(self): max_diff = (out1 - out2).abs().max() + self.assertTrue(num_params == num_grad) self.assertTrue(max_diff < 1e-5) def test_resnet_dilation(self): @@ -185,13 +314,39 @@ def test_resnet_dilation(self): f = 2 ** sum(i) self.assertEqual(out.shape, (1, 2048, 7 * f, 7 * f)) - def test_mobilenetv2_residual_setting(self): + def test_mobilenet_v2_residual_setting(self): model = models.__dict__["mobilenet_v2"](inverted_residual_setting=[[1, 16, 1, 1], [6, 24, 2, 2]]) model.eval() x = torch.rand(1, 3, 224, 224) out = model(x) self.assertEqual(out.shape[-1], 1000) + def test_mobilenet_norm_layer(self): + for name in ["mobilenet_v2", "mobilenet_v3_large", "mobilenet_v3_small"]: + model = models.__dict__[name]() + self.assertTrue(any(isinstance(x, nn.BatchNorm2d) for x in model.modules())) + + def get_gn(num_channels): + return nn.GroupNorm(32, num_channels) + + model = models.__dict__[name](norm_layer=get_gn) + self.assertFalse(any(isinstance(x, nn.BatchNorm2d) for x in model.modules())) + self.assertTrue(any(isinstance(x, nn.GroupNorm) for x in model.modules())) + + def test_inception_v3_eval(self): + # replacement for models.inception_v3(pretrained=True) that does not download weights + kwargs = {} + kwargs['transform_input'] = True + kwargs['aux_logits'] = True + kwargs['init_weights'] = False + name = "inception_v3" + model = models.Inception3(**kwargs) + model.aux_logits = False + model.AuxLogits = None + model = model.eval() + x = torch.rand(1, 3, 299, 299) + self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None)) + def test_fasterrcnn_double(self): model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False) model.double() @@ -206,43 +361,119 @@ def test_fasterrcnn_double(self): self.assertTrue("scores" in out[0]) self.assertTrue("labels" in out[0]) + def test_googlenet_eval(self): + # replacement for models.googlenet(pretrained=True) that does not download weights + kwargs = {} + kwargs['transform_input'] = True + kwargs['aux_logits'] = True + kwargs['init_weights'] = False + name = "googlenet" + model = models.GoogLeNet(**kwargs) + model.aux_logits = False + model.aux1 = None + model.aux2 = None + model = model.eval() + x = torch.rand(1, 3, 224, 224) + self.check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None)) + + @unittest.skipIf(not torch.cuda.is_available(), 'needs GPU') + def test_fasterrcnn_switch_devices(self): + def checkOut(out): + self.assertEqual(len(out), 1) + self.assertTrue("boxes" in out[0]) + self.assertTrue("scores" in out[0]) + self.assertTrue("labels" in out[0]) + + model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False) + model.cuda() + model.eval() + input_shape = (3, 300, 300) + x = torch.rand(input_shape, device='cuda') + model_input = [x] + out = model(model_input) + self.assertIs(model_input[0], x) + + checkOut(out) + + with torch.cuda.amp.autocast(): + out = model(model_input) + + checkOut(out) + + # now switch to cpu and make sure it works + model.cpu() + x = x.cpu() + out_cpu = model([x]) + + checkOut(out_cpu) + + def test_generalizedrcnn_transform_repr(self): + + min_size, max_size = 224, 299 + image_mean = [0.485, 0.456, 0.406] + image_std = [0.229, 0.224, 0.225] + + t = models.detection.transform.GeneralizedRCNNTransform(min_size=min_size, + max_size=max_size, + image_mean=image_mean, + image_std=image_std) + + # Check integrity of object __repr__ attribute + expected_string = 'GeneralizedRCNNTransform(' + _indent = '\n ' + expected_string += '{0}Normalize(mean={1}, std={2})'.format(_indent, image_mean, image_std) + expected_string += '{0}Resize(min_size=({1},), max_size={2}, '.format(_indent, min_size, max_size) + expected_string += "mode='bilinear')\n)" + self.assertEqual(t.__repr__(), expected_string) + + +_devs = [torch.device("cpu"), torch.device("cuda")] if torch.cuda.is_available() else [torch.device("cpu")] + for model_name in get_available_classification_models(): - # for-loop bodies don't define scopes, so we have to save the variables - # we want to close over in some way - def do_test(self, model_name=model_name): - input_shape = (1, 3, 224, 224) - if model_name in ['inception_v3']: - input_shape = (1, 3, 299, 299) - self._test_classification_model(model_name, input_shape) + for dev in _devs: + # for-loop bodies don't define scopes, so we have to save the variables + # we want to close over in some way + def do_test(self, model_name=model_name, dev=dev): + input_shape = (1, 3, 224, 224) + if model_name in ['inception_v3']: + input_shape = (1, 3, 299, 299) + self._test_classification_model(model_name, input_shape, dev) - setattr(ModelTester, "test_" + model_name, do_test) + setattr(ModelTester, f"test_{model_name}_{dev}", do_test) for model_name in get_available_segmentation_models(): - # for-loop bodies don't define scopes, so we have to save the variables - # we want to close over in some way - def do_test(self, model_name=model_name): - self._test_segmentation_model(model_name) + for dev in _devs: + # for-loop bodies don't define scopes, so we have to save the variables + # we want to close over in some way + def do_test(self, model_name=model_name, dev=dev): + self._test_segmentation_model(model_name, dev) - setattr(ModelTester, "test_" + model_name, do_test) + setattr(ModelTester, f"test_{model_name}_{dev}", do_test) for model_name in get_available_detection_models(): - # for-loop bodies don't define scopes, so we have to save the variables - # we want to close over in some way - def do_test(self, model_name=model_name): - self._test_detection_model(model_name) + for dev in _devs: + # for-loop bodies don't define scopes, so we have to save the variables + # we want to close over in some way + def do_test(self, model_name=model_name, dev=dev): + self._test_detection_model(model_name, dev) - setattr(ModelTester, "test_" + model_name, do_test) + setattr(ModelTester, f"test_{model_name}_{dev}", do_test) + def do_validation_test(self, model_name=model_name): + self._test_detection_model_validation(model_name) + + setattr(ModelTester, "test_" + model_name + "_validation", do_validation_test) -for model_name in get_available_video_models(): - def do_test(self, model_name=model_name): - self._test_video_model(model_name) +for model_name in get_available_video_models(): + for dev in _devs: + def do_test(self, model_name=model_name, dev=dev): + self._test_video_model(model_name, dev) - setattr(ModelTester, "test_" + model_name, do_test) + setattr(ModelTester, f"test_{model_name}_{dev}", do_test) if __name__ == '__main__': unittest.main() diff --git a/test/test_models_detection_anchor_utils.py b/test/test_models_detection_anchor_utils.py new file mode 100644 index 00000000000..872a57c1365 --- /dev/null +++ b/test/test_models_detection_anchor_utils.py @@ -0,0 +1,61 @@ +from collections import OrderedDict +import torch +from common_utils import TestCase +from torchvision.models.detection.anchor_utils import AnchorGenerator +from torchvision.models.detection.image_list import ImageList + + +class Tester(TestCase): + def test_incorrect_anchors(self): + incorrect_sizes = ((2, 4, 8), (32, 8), ) + incorrect_aspects = (0.5, 1.0) + anc = AnchorGenerator(incorrect_sizes, incorrect_aspects) + image1 = torch.randn(3, 800, 800) + image_list = ImageList(image1, [(800, 800)]) + feature_maps = [torch.randn(1, 50)] + self.assertRaises(ValueError, anc, image_list, feature_maps) + + def _init_test_anchor_generator(self): + anchor_sizes = ((10,),) + aspect_ratios = ((1,),) + anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) + + return anchor_generator + + def get_features(self, images): + s0, s1 = images.shape[-2:] + features = [torch.rand(2, 8, s0 // 5, s1 // 5)] + return features + + def test_anchor_generator(self): + images = torch.randn(2, 3, 15, 15) + features = self.get_features(images) + image_shapes = [i.shape[-2:] for i in images] + images = ImageList(images, image_shapes) + + model = self._init_test_anchor_generator() + model.eval() + anchors = model(images, features) + + # Estimate the number of target anchors + grid_sizes = [f.shape[-2:] for f in features] + num_anchors_estimated = 0 + for sizes, num_anchors_per_loc in zip(grid_sizes, model.num_anchors_per_location()): + num_anchors_estimated += sizes[0] * sizes[1] * num_anchors_per_loc + + anchors_output = torch.tensor([[-5., -5., 5., 5.], + [0., -5., 10., 5.], + [5., -5., 15., 5.], + [-5., 0., 5., 10.], + [0., 0., 10., 10.], + [5., 0., 15., 10.], + [-5., 5., 5., 15.], + [0., 5., 10., 15.], + [5., 5., 15., 15.]]) + + self.assertEqual(num_anchors_estimated, 9) + self.assertEqual(len(anchors), 2) + self.assertEqual(tuple(anchors[0].shape), (9, 4)) + self.assertEqual(tuple(anchors[1].shape), (9, 4)) + self.assertEqual(anchors[0], anchors_output) + self.assertEqual(anchors[1], anchors_output) diff --git a/test/test_models_detection_negative_samples.py b/test/test_models_detection_negative_samples.py new file mode 100644 index 00000000000..ad976a78b09 --- /dev/null +++ b/test/test_models_detection_negative_samples.py @@ -0,0 +1,144 @@ +import torch + +import torchvision.models +from torchvision.ops import MultiScaleRoIAlign +from torchvision.models.detection.rpn import AnchorGenerator, RPNHead, RegionProposalNetwork +from torchvision.models.detection.roi_heads import RoIHeads +from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, TwoMLPHead + +import unittest + + +class Tester(unittest.TestCase): + + def _make_empty_sample(self, add_masks=False, add_keypoints=False): + images = [torch.rand((3, 100, 100), dtype=torch.float32)] + boxes = torch.zeros((0, 4), dtype=torch.float32) + negative_target = {"boxes": boxes, + "labels": torch.zeros(0, dtype=torch.int64), + "image_id": 4, + "area": (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]), + "iscrowd": torch.zeros((0,), dtype=torch.int64)} + + if add_masks: + negative_target["masks"] = torch.zeros(0, 100, 100, dtype=torch.uint8) + + if add_keypoints: + negative_target["keypoints"] = torch.zeros(17, 0, 3, dtype=torch.float32) + + targets = [negative_target] + return images, targets + + def test_targets_to_anchors(self): + _, targets = self._make_empty_sample() + anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)] + + anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) + aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) + rpn_anchor_generator = AnchorGenerator( + anchor_sizes, aspect_ratios + ) + rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0]) + + head = RegionProposalNetwork( + rpn_anchor_generator, rpn_head, + 0.5, 0.3, + 256, 0.5, + 2000, 2000, 0.7, 0.05) + + labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets) + + self.assertEqual(labels[0].sum(), 0) + self.assertEqual(labels[0].shape, torch.Size([anchors[0].shape[0]])) + self.assertEqual(labels[0].dtype, torch.float32) + + self.assertEqual(matched_gt_boxes[0].sum(), 0) + self.assertEqual(matched_gt_boxes[0].shape, anchors[0].shape) + self.assertEqual(matched_gt_boxes[0].dtype, torch.float32) + + def test_assign_targets_to_proposals(self): + + proposals = [torch.randint(-50, 50, (20, 4), dtype=torch.float32)] + gt_boxes = [torch.zeros((0, 4), dtype=torch.float32)] + gt_labels = [torch.tensor([[0]], dtype=torch.int64)] + + box_roi_pool = MultiScaleRoIAlign( + featmap_names=['0', '1', '2', '3'], + output_size=7, + sampling_ratio=2) + + resolution = box_roi_pool.output_size[0] + representation_size = 1024 + box_head = TwoMLPHead( + 4 * resolution ** 2, + representation_size) + + representation_size = 1024 + box_predictor = FastRCNNPredictor( + representation_size, + 2) + + roi_heads = RoIHeads( + # Box + box_roi_pool, box_head, box_predictor, + 0.5, 0.5, + 512, 0.25, + None, + 0.05, 0.5, 100) + + matched_idxs, labels = roi_heads.assign_targets_to_proposals(proposals, gt_boxes, gt_labels) + + self.assertEqual(matched_idxs[0].sum(), 0) + self.assertEqual(matched_idxs[0].shape, torch.Size([proposals[0].shape[0]])) + self.assertEqual(matched_idxs[0].dtype, torch.int64) + + self.assertEqual(labels[0].sum(), 0) + self.assertEqual(labels[0].shape, torch.Size([proposals[0].shape[0]])) + self.assertEqual(labels[0].dtype, torch.int64) + + def test_forward_negative_sample_frcnn(self): + for name in ["fasterrcnn_resnet50_fpn", "fasterrcnn_mobilenet_v3_large_fpn", + "fasterrcnn_mobilenet_v3_large_320_fpn"]: + model = torchvision.models.detection.__dict__[name]( + num_classes=2, min_size=100, max_size=100) + + images, targets = self._make_empty_sample() + loss_dict = model(images, targets) + + self.assertEqual(loss_dict["loss_box_reg"], torch.tensor(0.)) + self.assertEqual(loss_dict["loss_rpn_box_reg"], torch.tensor(0.)) + + def test_forward_negative_sample_mrcnn(self): + model = torchvision.models.detection.maskrcnn_resnet50_fpn( + num_classes=2, min_size=100, max_size=100) + + images, targets = self._make_empty_sample(add_masks=True) + loss_dict = model(images, targets) + + self.assertEqual(loss_dict["loss_box_reg"], torch.tensor(0.)) + self.assertEqual(loss_dict["loss_rpn_box_reg"], torch.tensor(0.)) + self.assertEqual(loss_dict["loss_mask"], torch.tensor(0.)) + + def test_forward_negative_sample_krcnn(self): + model = torchvision.models.detection.keypointrcnn_resnet50_fpn( + num_classes=2, min_size=100, max_size=100) + + images, targets = self._make_empty_sample(add_keypoints=True) + loss_dict = model(images, targets) + + self.assertEqual(loss_dict["loss_box_reg"], torch.tensor(0.)) + self.assertEqual(loss_dict["loss_rpn_box_reg"], torch.tensor(0.)) + self.assertEqual(loss_dict["loss_keypoint"], torch.tensor(0.)) + + def test_forward_negative_sample_retinanet(self): + model = torchvision.models.detection.retinanet_resnet50_fpn( + num_classes=2, min_size=100, max_size=100, pretrained_backbone=False) + + images, targets = self._make_empty_sample() + loss_dict = model(images, targets) + + self.assertEqual(loss_dict["bbox_regression"], torch.tensor(0.)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_models_detection_utils.py b/test/test_models_detection_utils.py new file mode 100644 index 00000000000..f61d825e0d8 --- /dev/null +++ b/test/test_models_detection_utils.py @@ -0,0 +1,70 @@ +import copy +import torch +from torchvision.models.detection import _utils +from torchvision.models.detection.transform import GeneralizedRCNNTransform +import unittest +from torchvision.models.detection import backbone_utils + + +class Tester(unittest.TestCase): + def test_balanced_positive_negative_sampler(self): + sampler = _utils.BalancedPositiveNegativeSampler(4, 0.25) + # keep all 6 negatives first, then add 3 positives, last two are ignore + matched_idxs = [torch.tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, -1, -1])] + pos, neg = sampler(matched_idxs) + # we know the number of elements that should be sampled for the positive (1) + # and the negative (3), and their location. Let's make sure that they are + # there + self.assertEqual(pos[0].sum(), 1) + self.assertEqual(pos[0][6:9].sum(), 1) + self.assertEqual(neg[0].sum(), 3) + self.assertEqual(neg[0][0:6].sum(), 3) + + def test_resnet_fpn_backbone_frozen_layers(self): + # we know how many initial layers and parameters of the network should + # be frozen for each trainable_backbone_layers parameter value + # i.e all 53 params are frozen if trainable_backbone_layers=0 + # ad first 24 params are frozen if trainable_backbone_layers=2 + expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0} + for train_layers, exp_froz_params in expected_frozen_params.items(): + model = backbone_utils.resnet_fpn_backbone( + 'resnet50', pretrained=False, trainable_layers=train_layers) + # boolean list that is true if the param at that index is frozen + is_frozen = [not parameter.requires_grad for _, parameter in model.named_parameters()] + # check that expected initial number of layers are frozen + self.assertTrue(all(is_frozen[:exp_froz_params])) + + def test_validate_resnet_inputs_detection(self): + # default number of backbone layers to train + ret = backbone_utils._validate_trainable_layers( + pretrained=True, trainable_backbone_layers=None, max_value=5, default_value=3) + self.assertEqual(ret, 3) + # can't go beyond 5 + with self.assertRaises(AssertionError): + ret = backbone_utils._validate_trainable_layers( + pretrained=True, trainable_backbone_layers=6, max_value=5, default_value=3) + # if not pretrained, should use all trainable layers and warn + with self.assertWarns(UserWarning): + ret = backbone_utils._validate_trainable_layers( + pretrained=False, trainable_backbone_layers=0, max_value=5, default_value=3) + self.assertEqual(ret, 5) + + def test_transform_copy_targets(self): + transform = GeneralizedRCNNTransform(300, 500, torch.zeros(3), torch.ones(3)) + image = [torch.rand(3, 200, 300), torch.rand(3, 200, 200)] + targets = [{'boxes': torch.rand(3, 4)}, {'boxes': torch.rand(2, 4)}] + targets_copy = copy.deepcopy(targets) + out = transform(image, targets) # noqa: F841 + self.assertTrue(torch.equal(targets[0]['boxes'], targets_copy[0]['boxes'])) + self.assertTrue(torch.equal(targets[1]['boxes'], targets_copy[1]['boxes'])) + + def test_not_float_normalize(self): + transform = GeneralizedRCNNTransform(300, 500, torch.zeros(3), torch.ones(3)) + image = [torch.randint(0, 255, (3, 200, 300), dtype=torch.uint8)] + targets = [{'boxes': torch.rand(3, 4)}] + with self.assertRaises(TypeError): + out = transform(image, targets) # noqa: F841 + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_onnx.py b/test/test_onnx.py index 090f16cc550..63f182004b8 100644 --- a/test/test_onnx.py +++ b/test/test_onnx.py @@ -1,3 +1,12 @@ +# onnxruntime requires python 3.5 or above +try: + # This import should be before that of torch + # see https://github.com/onnx/onnx/issues/2394#issuecomment-581638840 + import onnxruntime +except ImportError: + onnxruntime = None + +from common_utils import set_rng_seed import io import torch from torchvision import ops @@ -12,12 +21,6 @@ from collections import OrderedDict -# onnxruntime requires python 3.5 or above -try: - import onnxruntime -except ImportError: - onnxruntime = None - import unittest from torchvision.ops._register_onnx_ops import _onnx_opset_version @@ -28,14 +31,19 @@ class ONNXExporterTester(unittest.TestCase): def setUpClass(cls): torch.manual_seed(123) - def run_model(self, model, inputs_list, tolerate_small_mismatch=False): + def run_model(self, model, inputs_list, tolerate_small_mismatch=False, do_constant_folding=True, dynamic_axes=None, + output_names=None, input_names=None): model.eval() onnx_io = io.BytesIO() + if isinstance(inputs_list[0][-1], dict): + torch_onnx_input = inputs_list[0] + ({},) + else: + torch_onnx_input = inputs_list[0] # export to onnx with the first input - torch.onnx.export(model, inputs_list[0], onnx_io, - do_constant_folding=True, opset_version=_onnx_opset_version) - + torch.onnx.export(model, torch_onnx_input, onnx_io, + do_constant_folding=do_constant_folding, opset_version=_onnx_opset_version, + dynamic_axes=dynamic_axes, input_names=input_names, output_names=output_names) # validate the exported model with onnx runtime for test_inputs in inputs_list: with torch.no_grad(): @@ -65,6 +73,7 @@ def to_numpy(tensor): # compute onnxruntime output prediction ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs)) ort_outs = ort_session.run(None, ort_inputs) + for i in range(0, len(outputs)): try: torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05) @@ -75,9 +84,10 @@ def to_numpy(tensor): raise def test_nms(self): - boxes = torch.rand(5, 4) - boxes[:, 2:] += torch.rand(5, 2) - scores = torch.randn(5) + num_boxes = 100 + boxes = torch.rand(num_boxes, 4) + boxes[:, 2:] += boxes[:, :2] + scores = torch.randn(num_boxes) class Module(torch.nn.Module): def forward(self, boxes, scores): @@ -85,12 +95,78 @@ def forward(self, boxes, scores): self.run_model(Module(), [(boxes, scores)]) + def test_batched_nms(self): + num_boxes = 100 + boxes = torch.rand(num_boxes, 4) + boxes[:, 2:] += boxes[:, :2] + scores = torch.randn(num_boxes) + idxs = torch.randint(0, 5, size=(num_boxes,)) + + class Module(torch.nn.Module): + def forward(self, boxes, scores, idxs): + return ops.batched_nms(boxes, scores, idxs, 0.5) + + self.run_model(Module(), [(boxes, scores, idxs)]) + + def test_clip_boxes_to_image(self): + boxes = torch.randn(5, 4) * 500 + boxes[:, 2:] += boxes[:, :2] + size = torch.randn(200, 300) + + size_2 = torch.randn(300, 400) + + class Module(torch.nn.Module): + def forward(self, boxes, size): + return ops.boxes.clip_boxes_to_image(boxes, size.shape) + + self.run_model(Module(), [(boxes, size), (boxes, size_2)], + input_names=["boxes", "size"], + dynamic_axes={"size": [0, 1]}) + def test_roi_align(self): x = torch.rand(1, 1, 10, 10, dtype=torch.float32) single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32) model = ops.RoIAlign((5, 5), 1, 2) self.run_model(model, [(x, single_roi)]) + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 1, -1) + self.run_model(model, [(x, single_roi)]) + + def test_roi_align_aligned(self): + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 1.5, 1.5, 3, 3]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 1, 2, aligned=True) + self.run_model(model, [(x, single_roi)]) + + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 0.5, 3, aligned=True) + self.run_model(model, [(x, single_roi)]) + + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 1.8, 2, aligned=True) + self.run_model(model, [(x, single_roi)]) + + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32) + model = ops.RoIAlign((2, 2), 2.5, 0, aligned=True) + self.run_model(model, [(x, single_roi)]) + + x = torch.rand(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 0.2, 0.3, 4.5, 3.5]], dtype=torch.float32) + model = ops.RoIAlign((2, 2), 2.5, -1, aligned=True) + self.run_model(model, [(x, single_roi)]) + + @unittest.skip # Issue in exporting ROIAlign with aligned = True for malformed boxes + def test_roi_align_malformed_boxes(self): + x = torch.randn(1, 1, 10, 10, dtype=torch.float32) + single_roi = torch.tensor([[0, 2, 0.3, 1.5, 1.5]], dtype=torch.float32) + model = ops.RoIAlign((5, 5), 1, 1, aligned=True) + self.run_model(model, [(x, single_roi)]) + def test_roi_pool(self): x = torch.rand(1, 1, 10, 10, dtype=torch.float32) rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32) @@ -99,6 +175,20 @@ def test_roi_pool(self): model = ops.RoIPool((pool_h, pool_w), 2) self.run_model(model, [(x, rois)]) + def test_resize_images(self): + class TransformModule(torch.nn.Module): + def __init__(self_module): + super(TransformModule, self_module).__init__() + self_module.transform = self._init_test_generalized_rcnn_transform() + + def forward(self_module, images): + return self_module.transform.resize(images, None)[0] + + input = torch.rand(3, 10, 20) + input_test = torch.rand(3, 100, 150) + self.run_model(TransformModule(), [(input,), (input_test,)], + input_names=["input1"], dynamic_axes={"input1": [0, 1, 2]}) + def test_transform_images(self): class TransformModule(torch.nn.Module): @@ -109,9 +199,9 @@ def __init__(self_module): def forward(self_module, images): return self_module.transform(images)[0].tensors - input = [torch.rand(3, 100, 200), torch.rand(3, 200, 200)] - input_test = [torch.rand(3, 100, 200), torch.rand(3, 200, 200)] - self.run_model(TransformModule(), [input, input_test]) + input = torch.rand(3, 100, 200), torch.rand(3, 200, 200) + input_test = torch.rand(3, 100, 200), torch.rand(3, 200, 200) + self.run_model(TransformModule(), [(input,), (input_test,)]) def _init_test_generalized_rcnn_transform(self): min_size = 100 @@ -134,12 +224,14 @@ def _init_test_rpn(self): rpn_pre_nms_top_n = dict(training=2000, testing=1000) rpn_post_nms_top_n = dict(training=2000, testing=1000) rpn_nms_thresh = 0.7 + rpn_score_thresh = 0.0 rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, - rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) + rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh, + score_thresh=rpn_score_thresh) return rpn def _init_test_roi_heads_faster_rcnn(self): @@ -192,23 +284,31 @@ def get_features(self, images): return features def test_rpn(self): + set_rng_seed(0) + class RPNModule(torch.nn.Module): - def __init__(self_module, images): + def __init__(self_module): super(RPNModule, self_module).__init__() self_module.rpn = self._init_test_rpn() - self_module.images = ImageList(images, [i.shape[-2:] for i in images]) - def forward(self_module, features): - return self_module.rpn(self_module.images, features) + def forward(self_module, images, features): + images = ImageList(images, [i.shape[-2:] for i in images]) + return self_module.rpn(images, features) - images = torch.rand(2, 3, 600, 600) + images = torch.rand(2, 3, 150, 150) features = self.get_features(images) - test_features = self.get_features(images) + images2 = torch.rand(2, 3, 80, 80) + test_features = self.get_features(images2) - model = RPNModule(images) + model = RPNModule() model.eval() - model(features) - self.run_model(model, [(features,), (test_features,)], tolerate_small_mismatch=True) + model(images, features) + + self.run_model(model, [(images, features), (images2, test_features)], tolerate_small_mismatch=True, + input_names=["input1", "input2", "input3", "input4", "input5", "input6"], + dynamic_axes={"input1": [0, 1, 2, 3], "input2": [0, 1, 2, 3], + "input3": [0, 1, 2, 3], "input4": [0, 1, 2, 3], + "input5": [0, 1, 2, 3], "input6": [0, 1, 2, 3]}) def test_multi_scale_roi_align(self): @@ -237,68 +337,88 @@ def forward(self, input, boxes): def test_roi_heads(self): class RoiHeadsModule(torch.nn.Module): - def __init__(self_module, images): + def __init__(self_module): super(RoiHeadsModule, self_module).__init__() self_module.transform = self._init_test_generalized_rcnn_transform() self_module.rpn = self._init_test_rpn() self_module.roi_heads = self._init_test_roi_heads_faster_rcnn() - self_module.original_image_sizes = [img.shape[-2:] for img in images] - self_module.images = ImageList(images, [i.shape[-2:] for i in images]) - def forward(self_module, features): - proposals, _ = self_module.rpn(self_module.images, features) - detections, _ = self_module.roi_heads(features, proposals, self_module.images.image_sizes) + def forward(self_module, images, features): + original_image_sizes = [img.shape[-2:] for img in images] + images = ImageList(images, [i.shape[-2:] for i in images]) + proposals, _ = self_module.rpn(images, features) + detections, _ = self_module.roi_heads(features, proposals, images.image_sizes) detections = self_module.transform.postprocess(detections, - self_module.images.image_sizes, - self_module.original_image_sizes) + images.image_sizes, + original_image_sizes) return detections - images = torch.rand(2, 3, 600, 600) + images = torch.rand(2, 3, 100, 100) features = self.get_features(images) - test_features = self.get_features(images) + images2 = torch.rand(2, 3, 150, 150) + test_features = self.get_features(images2) - model = RoiHeadsModule(images) + model = RoiHeadsModule() model.eval() - model(features) - self.run_model(model, [(features,), (test_features,)]) + model(images, features) - def get_image_from_url(self, url): + self.run_model(model, [(images, features), (images2, test_features)], tolerate_small_mismatch=True, + input_names=["input1", "input2", "input3", "input4", "input5", "input6"], + dynamic_axes={"input1": [0, 1, 2, 3], "input2": [0, 1, 2, 3], "input3": [0, 1, 2, 3], + "input4": [0, 1, 2, 3], "input5": [0, 1, 2, 3], "input6": [0, 1, 2, 3]}) + + def get_image_from_url(self, url, size=None): import requests - import numpy from PIL import Image from io import BytesIO from torchvision import transforms data = requests.get(url) image = Image.open(BytesIO(data.content)).convert("RGB") - image = image.resize((300, 200), Image.BILINEAR) + + if size is None: + size = (300, 200) + image = image.resize(size, Image.BILINEAR) to_tensor = transforms.ToTensor() return to_tensor(image) def get_test_images(self): image_url = "http://farm3.staticflickr.com/2469/3915380994_2e611b1779_z.jpg" - image = self.get_image_from_url(url=image_url) + image = self.get_image_from_url(url=image_url, size=(100, 320)) + image_url2 = "https://pytorch.org/tutorials/_static/img/tv_tutorial/tv_image05.png" - image2 = self.get_image_from_url(url=image_url2) + image2 = self.get_image_from_url(url=image_url2, size=(250, 380)) + images = [image] test_images = [image2] return images, test_images def test_faster_rcnn(self): images, test_images = self.get_test_images() - - model = models.detection.faster_rcnn.fasterrcnn_resnet50_fpn(pretrained=True, - min_size=200, - max_size=300) + dummy_image = [torch.ones(3, 100, 100) * 0.3] + model = models.detection.faster_rcnn.fasterrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300) model.eval() model(images) - self.run_model(model, [(images,), (test_images,)]) + # Test exported model on images of different size, or dummy input + self.run_model(model, [(images,), (test_images,), (dummy_image,)], input_names=["images_tensors"], + output_names=["outputs"], + dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]}, + tolerate_small_mismatch=True) + # Test exported model for an image with no detections on other images + self.run_model(model, [(dummy_image,), (images,)], input_names=["images_tensors"], + output_names=["outputs"], + dynamic_axes={"images_tensors": [0, 1, 2], "outputs": [0, 1, 2]}, + tolerate_small_mismatch=True) # Verify that paste_mask_in_image beahves the same in tracing. # This test also compares both paste_masks_in_image and _onnx_paste_masks_in_image # (since jit_trace witll call _onnx_paste_masks_in_image). def test_paste_mask_in_image(self): + # disable profiling + torch._C._jit_set_profiling_executor(False) + torch._C._jit_set_profiling_mode(False) + masks = torch.rand(10, 1, 26, 26) boxes = torch.rand(10, 4) boxes[:, 2:] += torch.rand(10, 2) @@ -325,14 +445,84 @@ def test_paste_mask_in_image(self): assert torch.all(out2.eq(out_trace2)) - @unittest.skip("Disable test until Resize opset 11 is implemented in ONNX Runtime") def test_mask_rcnn(self): images, test_images = self.get_test_images() - - model = models.detection.mask_rcnn.maskrcnn_resnet50_fpn(pretrained=True) + dummy_image = [torch.ones(3, 100, 100) * 0.3] + model = models.detection.mask_rcnn.maskrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300) + model.eval() + model(images) + # Test exported model on images of different size, or dummy input + self.run_model(model, [(images,), (test_images,), (dummy_image,)], + input_names=["images_tensors"], + output_names=["boxes", "labels", "scores", "masks"], + dynamic_axes={"images_tensors": [0, 1, 2], "boxes": [0, 1], "labels": [0], + "scores": [0], "masks": [0, 1, 2]}, + tolerate_small_mismatch=True) + # TODO: enable this test once dynamic model export is fixed + # Test exported model for an image with no detections on other images + self.run_model(model, [(dummy_image,), (images,)], + input_names=["images_tensors"], + output_names=["boxes", "labels", "scores", "masks"], + dynamic_axes={"images_tensors": [0, 1, 2], "boxes": [0, 1], "labels": [0], + "scores": [0], "masks": [0, 1, 2]}, + tolerate_small_mismatch=True) + + # Verify that heatmaps_to_keypoints behaves the same in tracing. + # This test also compares both heatmaps_to_keypoints and _onnx_heatmaps_to_keypoints + # (since jit_trace witll call _heatmaps_to_keypoints). + # @unittest.skip("Disable test until Resize bug fixed in ORT") + def test_heatmaps_to_keypoints(self): + # disable profiling + torch._C._jit_set_profiling_executor(False) + torch._C._jit_set_profiling_mode(False) + + maps = torch.rand(10, 1, 26, 26) + rois = torch.rand(10, 4) + from torchvision.models.detection.roi_heads import heatmaps_to_keypoints + out = heatmaps_to_keypoints(maps, rois) + jit_trace = torch.jit.trace(heatmaps_to_keypoints, (maps, rois)) + out_trace = jit_trace(maps, rois) + + assert torch.all(out[0].eq(out_trace[0])) + assert torch.all(out[1].eq(out_trace[1])) + + maps2 = torch.rand(20, 2, 21, 21) + rois2 = torch.rand(20, 4) + from torchvision.models.detection.roi_heads import heatmaps_to_keypoints + out2 = heatmaps_to_keypoints(maps2, rois2) + out_trace2 = jit_trace(maps2, rois2) + + assert torch.all(out2[0].eq(out_trace2[0])) + assert torch.all(out2[1].eq(out_trace2[1])) + + def test_keypoint_rcnn(self): + images, test_images = self.get_test_images() + dummy_images = [torch.ones(3, 100, 100) * 0.3] + model = models.detection.keypoint_rcnn.keypointrcnn_resnet50_fpn(pretrained=True, min_size=200, max_size=300) model.eval() model(images) - self.run_model(model, [(images,), (test_images,)]) + self.run_model(model, [(images,), (test_images,), (dummy_images,)], + input_names=["images_tensors"], + output_names=["outputs1", "outputs2", "outputs3", "outputs4"], + dynamic_axes={"images_tensors": [0, 1, 2]}, + tolerate_small_mismatch=True) + + self.run_model(model, [(dummy_images,), (test_images,)], + input_names=["images_tensors"], + output_names=["outputs1", "outputs2", "outputs3", "outputs4"], + dynamic_axes={"images_tensors": [0, 1, 2]}, + tolerate_small_mismatch=True) + + def test_shufflenet_v2_dynamic_axes(self): + model = models.shufflenet_v2_x0_5(pretrained=True) + dummy_input = torch.randn(1, 3, 224, 224, requires_grad=True) + test_inputs = torch.cat([dummy_input, dummy_input, dummy_input], 0) + + self.run_model(model, [(dummy_input,), (test_inputs,)], + input_names=["input_images"], + output_names=["output"], + dynamic_axes={"input_images": {0: 'batch_size'}, "output": {0: 'batch_size'}}, + tolerate_small_mismatch=True) if __name__ == '__main__': diff --git a/test/test_ops.py b/test/test_ops.py index c4cc3fe0bd6..8c63c9c29c6 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1,15 +1,19 @@ -from __future__ import division +from common_utils import set_rng_seed +import math +import unittest + import numpy as np + import torch +from functools import lru_cache +from torch import Tensor from torch.autograd import gradcheck - +from torch.nn.modules.utils import _pair from torchvision import ops - -from itertools import product -import unittest +from typing import Tuple -class RoIOpTester(object): +class OpTester(object): @classmethod def setUpClass(cls): cls.dtype = torch.float64 @@ -43,24 +47,37 @@ def test_backward_cuda_non_contiguous(self): self._test_backward(device=torch.device('cuda'), contiguous=False) def _test_forward(self, device, contiguous): + pass + + def _test_backward(self, device, contiguous): + pass + + +class RoIOpTester(OpTester): + def _test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs): + x_dtype = self.dtype if x_dtype is None else x_dtype + rois_dtype = self.dtype if rois_dtype is None else rois_dtype pool_size = 5 # n_channels % (pool_size ** 2) == 0 required for PS opeartions. n_channels = 2 * (pool_size ** 2) - x = torch.rand(2, n_channels, 10, 10, dtype=self.dtype, device=device) + x = torch.rand(2, n_channels, 10, 10, dtype=x_dtype, device=device) if not contiguous: x = x.permute(0, 1, 3, 2) rois = torch.tensor([[0, 0, 0, 9, 9], # format is (xyxy) [0, 0, 5, 4, 9], [0, 5, 5, 9, 9], [1, 0, 0, 9, 9]], - dtype=self.dtype, device=device) + dtype=rois_dtype, device=device) pool_h, pool_w = pool_size, pool_size - y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1) + y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs) + # the following should be true whether we're running an autocast test or not. + self.assertTrue(y.dtype == x.dtype) gt_y = self.expected_fn(x, rois, pool_h, pool_w, spatial_scale=1, - sampling_ratio=-1, device=device, dtype=self.dtype) + sampling_ratio=-1, device=device, dtype=self.dtype, **kwargs) - self.assertTrue(torch.allclose(gt_y, y)) + tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5 + self.assertTrue(torch.allclose(gt_y.to(y.dtype), y, rtol=tol, atol=tol)) def _test_backward(self, device, contiguous): pool_size = 2 @@ -79,7 +96,22 @@ def func(z): self.assertTrue(gradcheck(func, (x,))) self.assertTrue(gradcheck(script_func, (x,))) - return + + def test_boxes_shape(self): + self._test_boxes_shape() + + def _helper_boxes_shape(self, func): + # test boxes as Tensor[N, 5] + with self.assertRaises(AssertionError): + a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8) + boxes = torch.tensor([[0, 0, 3, 3]], dtype=a.dtype) + func(a, boxes, output_size=(2, 2)) + + # test boxes as List[Tensor[N, 4]] + with self.assertRaises(AssertionError): + a = torch.linspace(1, 8 * 8, 8 * 8).reshape(1, 1, 8, 8) + boxes = torch.tensor([[0, 0, 3]], dtype=a.dtype) + ops.roi_pool(a, [boxes], output_size=(2, 2)) def fn(*args, **kwargs): pass @@ -90,17 +122,21 @@ def get_script_fn(*args, **kwargs): def expected_fn(*args, **kwargs): pass + @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + def test_autocast(self): + for x_dtype in (torch.float, torch.half): + for rois_dtype in (torch.float, torch.half): + with torch.cuda.amp.autocast(): + self._test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype) + class RoIPoolTester(RoIOpTester, unittest.TestCase): def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): return ops.RoIPool((pool_h, pool_w), spatial_scale)(x, rois) def get_script_fn(self, rois, pool_size): - @torch.jit.script - def script_fn(input, rois, pool_size): - # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor - return ops.roi_pool(input, rois, pool_size, 1.0)[0] - return lambda x: script_fn(x, rois, pool_size) + scriped = torch.jit.script(ops.roi_pool) + return lambda x: scriped(x, rois, pool_size) def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64): @@ -129,17 +165,17 @@ def get_slice(k, block): y[roi_idx, :, i, j] = bin_x.reshape(n_channels, -1).max(dim=1)[0] return y + def _test_boxes_shape(self): + self._helper_boxes_shape(ops.roi_pool) + class PSRoIPoolTester(RoIOpTester, unittest.TestCase): def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): return ops.PSRoIPool((pool_h, pool_w), 1)(x, rois) def get_script_fn(self, rois, pool_size): - @torch.jit.script - def script_fn(input, rois, pool_size): - # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor - return ops.ps_roi_pool(input, rois, pool_size, 1.0)[0] - return lambda x: script_fn(x, rois, pool_size) + scriped = torch.jit.script(ops.ps_roi_pool) + return lambda x: scriped(x, rois, pool_size) def expected_fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, device=None, dtype=torch.float64): @@ -173,55 +209,63 @@ def get_slice(k, block): y[roi_idx, c_out, i, j] = t / area return y + def _test_boxes_shape(self): + self._helper_boxes_shape(ops.ps_roi_pool) -def bilinear_interpolate(data, height, width, y, x): - if y < -1.0 or y > height or x < -1.0 or x > width: - return 0. - y = min(max(0, y), height - 1) - x = min(max(0, x), width - 1) +def bilinear_interpolate(data, y, x, snap_border=False): + height, width = data.shape - y_low = int(y) - y_high = min(y_low + 1, height - 1) + if snap_border: + if -1 < y <= 0: + y = 0 + elif height - 1 <= y < height: + y = height - 1 - x_low = int(x) - x_high = min(x_low + 1, width - 1) + if -1 < x <= 0: + x = 0 + elif width - 1 <= x < width: + x = width - 1 - wy_h = y - y_low - wy_l = 1 - wy_h + y_low = int(math.floor(y)) + x_low = int(math.floor(x)) + y_high = y_low + 1 + x_high = x_low + 1 + wy_h = y - y_low wx_h = x - x_low + wy_l = 1 - wy_h wx_l = 1 - wx_h val = 0 - for wx, x in zip((wx_l, wx_h), (x_low, x_high)): - for wy, y in zip((wy_l, wy_h), (y_low, y_high)): - val += wx * wy * data[y * width + x] + for wx, xp in zip((wx_l, wx_h), (x_low, x_high)): + for wy, yp in zip((wy_l, wy_h), (y_low, y_high)): + if 0 <= yp < height and 0 <= xp < width: + val += wx * wy * data[yp, xp] return val class RoIAlignTester(RoIOpTester, unittest.TestCase): - def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): + def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, **kwargs): return ops.RoIAlign((pool_h, pool_w), spatial_scale=spatial_scale, - sampling_ratio=sampling_ratio)(x, rois) + sampling_ratio=sampling_ratio, aligned=aligned)(x, rois) def get_script_fn(self, rois, pool_size): - @torch.jit.script - def script_fn(input, rois, pool_size): - # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor - return ops.roi_align(input, rois, pool_size, 1.0)[0] - return lambda x: script_fn(x, rois, pool_size) + scriped = torch.jit.script(ops.roi_align) + return lambda x: scriped(x, rois, pool_size) - def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, + def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, aligned=False, device=None, dtype=torch.float64): if device is None: device = torch.device("cpu") n_channels = in_data.size(1) out_data = torch.zeros(rois.size(0), n_channels, pool_h, pool_w, dtype=dtype, device=device) + offset = 0.5 if aligned else 0. + for r, roi in enumerate(rois): batch_idx = int(roi[0]) - j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale for x in roi[1:]) + j_begin, i_begin, j_end, i_end = (x.item() * spatial_scale - offset for x in roi[1:]) roi_h = i_end - i_begin roi_w = j_end - j_begin @@ -242,17 +286,91 @@ def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_r y = start_h + (iy + 0.5) * bin_h / grid_h for ix in range(0, grid_w): x = start_w + (ix + 0.5) * bin_w / grid_w - val += bilinear_interpolate( - in_data[batch_idx, channel, :, :].flatten(), - in_data.size(-2), - in_data.size(-1), - y, x - ) + val += bilinear_interpolate(in_data[batch_idx, channel, :, :], y, x, snap_border=True) val /= grid_h * grid_w out_data[r, channel, i, j] = val return out_data + def _test_boxes_shape(self): + self._helper_boxes_shape(ops.roi_align) + + def _test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, **kwargs): + for aligned in (True, False): + super()._test_forward(device, contiguous, x_dtype, rois_dtype, aligned=aligned) + + def test_qroialign(self): + """Make sure quantized version of RoIAlign is close to float version""" + pool_size = 5 + img_size = 10 + n_channels = 2 + num_imgs = 1 + dtype = torch.float + + def make_rois(num_rois=1000): + rois = torch.randint(0, img_size // 2, size=(num_rois, 5)).to(dtype) + rois[:, 0] = torch.randint(0, num_imgs, size=(num_rois,)) # set batch index + rois[:, 3:] += rois[:, 1:3] # make sure boxes aren't degenerate + return rois + + for aligned in (True, False): + for scale, zero_point in ((1, 0), (2, 10), (0.1, 50)): + for qdtype in (torch.qint8, torch.quint8, torch.qint32): + + x = torch.randint(50, 100, size=(num_imgs, n_channels, img_size, img_size)).to(dtype) + qx = torch.quantize_per_tensor(x, scale=scale, zero_point=zero_point, dtype=qdtype) + + rois = make_rois() + qrois = torch.quantize_per_tensor(rois, scale=scale, zero_point=zero_point, dtype=qdtype) + + x, rois = qx.dequantize(), qrois.dequantize() # we want to pass the same inputs + + y = ops.roi_align( + x, + rois, + output_size=pool_size, + spatial_scale=1, + sampling_ratio=-1, + aligned=aligned, + ) + qy = ops.roi_align( + qx, + qrois, + output_size=pool_size, + spatial_scale=1, + sampling_ratio=-1, + aligned=aligned, + ) + + # The output qy is itself a quantized tensor and there might have been a loss of info when it was + # quantized. For a fair comparison we need to quantize y as well + quantized_float_y = torch.quantize_per_tensor(y, scale=scale, zero_point=zero_point, dtype=qdtype) + + try: + # Ideally, we would assert this, which passes with (scale, zero) == (1, 0) + self.assertTrue((qy == quantized_float_y).all()) + except AssertionError: + # But because the computation aren't exactly the same between the 2 RoIAlign procedures, some + # rounding error may lead to a difference of 2 in the output. + # For example with (scale, zero) = (2, 10), 45.00000... will be quantized to 44 + # but 45.00000001 will be rounded to 46. We make sure below that: + # - such discrepancies between qy and quantized_float_y are very rare (less then 5%) + # - any difference between qy and quantized_float_y is == scale + diff_idx = torch.where(qy != quantized_float_y) + num_diff = diff_idx[0].numel() + self.assertTrue(num_diff / qy.numel() < .05) + + abs_diff = torch.abs(qy[diff_idx].dequantize() - quantized_float_y[diff_idx].dequantize()) + t_scale = torch.full_like(abs_diff, fill_value=scale) + self.assertTrue(torch.allclose(abs_diff, t_scale, atol=1e-5)) + + x = torch.randint(50, 100, size=(2, 3, 10, 10)).to(dtype) + qx = torch.quantize_per_tensor(x, scale=1, zero_point=0, dtype=torch.qint8) + rois = make_rois(10) + qrois = torch.quantize_per_tensor(rois, scale=1, zero_point=0, dtype=torch.qint8) + with self.assertRaisesRegex(RuntimeError, "Only one image per batch is allowed"): + ops.roi_align(qx, qrois, output_size=pool_size) + class PSRoIAlignTester(RoIOpTester, unittest.TestCase): def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs): @@ -260,11 +378,8 @@ def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwar sampling_ratio=sampling_ratio)(x, rois) def get_script_fn(self, rois, pool_size): - @torch.jit.script - def script_fn(input, rois, pool_size): - # type: (torch.Tensor, torch.Tensor, int) -> torch.Tensor - return ops.ps_roi_align(input, rois, pool_size, 1.0)[0] - return lambda x: script_fn(x, rois, pool_size) + scriped = torch.jit.script(ops.ps_roi_align) + return lambda x: scriped(x, rois, pool_size) def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, sampling_ratio=-1, dtype=torch.float64): @@ -298,17 +413,29 @@ def expected_fn(self, in_data, rois, pool_h, pool_w, device, spatial_scale=1, y = start_h + (iy + 0.5) * bin_h / grid_h for ix in range(0, grid_w): x = start_w + (ix + 0.5) * bin_w / grid_w - val += bilinear_interpolate( - in_data[batch_idx, c_in, :, :].flatten(), - in_data.size(-2), - in_data.size(-1), - y, x - ) + val += bilinear_interpolate(in_data[batch_idx, c_in, :, :], y, x, snap_border=True) val /= grid_h * grid_w out_data[r, c_out, i, j] = val return out_data + def _test_boxes_shape(self): + self._helper_boxes_shape(ops.ps_roi_align) + + +class MultiScaleRoIAlignTester(unittest.TestCase): + def test_msroialign_repr(self): + fmap_names = ['0'] + output_size = (7, 7) + sampling_ratio = 2 + # Pass mock feature map names + t = ops.poolers.MultiScaleRoIAlign(fmap_names, output_size, sampling_ratio) + + # Check integrity of object __repr__ attribute + expected_string = (f"MultiScaleRoIAlign(featmap_names={fmap_names}, output_size={output_size}, " + f"sampling_ratio={sampling_ratio})") + self.assertEqual(t.__repr__(), expected_string) + class NMSTester(unittest.TestCase): def reference_nms(self, boxes, scores, iou_threshold): @@ -339,10 +466,14 @@ def _create_tensors_with_iou(self, N, iou_thresh): # let b0 be [x0, y0, x1, y1], and b1 be [x0, y0, x1 + d, y1], # then, in order to satisfy ops.iou(b0, b1) == iou_thresh, # we need to have d = (x1 - x0) * (1 - iou_thresh) / iou_thresh + # Adjust the threshold upward a bit with the intent of creating + # at least one box that exceeds (barely) the threshold and so + # should be suppressed. boxes = torch.rand(N, 4) * 100 boxes[:, 2:] += boxes[:, :2] boxes[-1, :] = boxes[0, :] x0, y0, x1, y1 = boxes[-1].tolist() + iou_thresh += 1e-5 boxes[-1, 2] += (x1 - x0) * (1 - iou_thresh) / iou_thresh scores = torch.rand(N) return boxes, scores @@ -354,9 +485,37 @@ def test_nms(self): keep_ref = self.reference_nms(boxes, scores, iou) keep = ops.nms(boxes, scores, iou) self.assertTrue(torch.allclose(keep, keep_ref), err_msg.format(iou)) + self.assertRaises(RuntimeError, ops.nms, torch.rand(4), torch.rand(3), 0.5) + self.assertRaises(RuntimeError, ops.nms, torch.rand(3, 5), torch.rand(3), 0.5) + self.assertRaises(RuntimeError, ops.nms, torch.rand(3, 4), torch.rand(3, 2), 0.5) + self.assertRaises(RuntimeError, ops.nms, torch.rand(3, 4), torch.rand(4), 0.5) + + def test_qnms(self): + # Note: we compare qnms vs nms instead of qnms vs reference implementation. + # This is because with the int convertion, the trick used in _create_tensors_with_iou + # doesn't really work (in fact, nms vs reference implem will also fail with ints) + err_msg = 'NMS and QNMS give different results for IoU={}' + for iou in [0.2, 0.5, 0.8]: + for scale, zero_point in ((1, 0), (2, 50), (3, 10)): + boxes, scores = self._create_tensors_with_iou(1000, iou) + scores *= 100 # otherwise most scores would be 0 or 1 after int convertion + + qboxes = torch.quantize_per_tensor(boxes, scale=scale, zero_point=zero_point, + dtype=torch.quint8) + qscores = torch.quantize_per_tensor(scores, scale=scale, zero_point=zero_point, + dtype=torch.quint8) + + boxes = qboxes.dequantize() + scores = qscores.dequantize() + + keep = ops.nms(boxes, scores, iou) + qkeep = ops.nms(qboxes, qscores, iou) + + self.assertTrue(torch.allclose(qkeep, keep), err_msg.format(iou)) @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") - def test_nms_cuda(self): + def test_nms_cuda(self, dtype=torch.float64): + tol = 1e-3 if dtype is torch.half else 1e-5 err_msg = 'NMS incompatible between CPU and CUDA for IoU={}' for iou in [0.2, 0.5, 0.8]: @@ -364,7 +523,502 @@ def test_nms_cuda(self): r_cpu = ops.nms(boxes, scores, iou) r_cuda = ops.nms(boxes.cuda(), scores.cuda(), iou) - self.assertTrue(torch.allclose(r_cpu, r_cuda.cpu()), err_msg.format(iou)) + is_eq = torch.allclose(r_cpu, r_cuda.cpu()) + if not is_eq: + # if the indices are not the same, ensure that it's because the scores + # are duplicate + is_eq = torch.allclose(scores[r_cpu], scores[r_cuda.cpu()], rtol=tol, atol=tol) + self.assertTrue(is_eq, err_msg.format(iou)) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + def test_autocast(self): + for dtype in (torch.float, torch.half): + with torch.cuda.amp.autocast(): + self.test_nms_cuda(dtype=dtype) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + def test_nms_cuda_float16(self): + boxes = torch.tensor([[285.3538, 185.5758, 1193.5110, 851.4551], + [285.1472, 188.7374, 1192.4984, 851.0669], + [279.2440, 197.9812, 1189.4746, 849.2019]]).cuda() + scores = torch.tensor([0.6370, 0.7569, 0.3966]).cuda() + + iou_thres = 0.2 + keep32 = ops.nms(boxes, scores, iou_thres) + keep16 = ops.nms(boxes.to(torch.float16), scores.to(torch.float16), iou_thres) + self.assertTrue(torch.all(torch.eq(keep32, keep16))) + + def test_batched_nms_implementations(self): + """Make sure that both implementations of batched_nms yield identical results""" + + num_boxes = 1000 + iou_threshold = .9 + + boxes = torch.cat((torch.rand(num_boxes, 2), torch.rand(num_boxes, 2) + 10), dim=1) + assert max(boxes[:, 0]) < min(boxes[:, 2]) # x1 < x2 + assert max(boxes[:, 1]) < min(boxes[:, 3]) # y1 < y2 + + scores = torch.rand(num_boxes) + idxs = torch.randint(0, 4, size=(num_boxes,)) + keep_vanilla = ops.boxes._batched_nms_vanilla(boxes, scores, idxs, iou_threshold) + keep_trick = ops.boxes._batched_nms_coordinate_trick(boxes, scores, idxs, iou_threshold) + + err_msg = "The vanilla and the trick implementation yield different nms outputs." + self.assertTrue(torch.allclose(keep_vanilla, keep_trick), err_msg) + + # Also make sure an empty tensor is returned if boxes is empty + empty = torch.empty((0,), dtype=torch.int64) + self.assertTrue(torch.allclose(empty, ops.batched_nms(empty, None, None, None))) + + +class DeformConvTester(OpTester, unittest.TestCase): + def expected_fn(self, x, weight, offset, mask, bias, stride=1, padding=0, dilation=1): + stride_h, stride_w = _pair(stride) + pad_h, pad_w = _pair(padding) + dil_h, dil_w = _pair(dilation) + weight_h, weight_w = weight.shape[-2:] + + n_batches, n_in_channels, in_h, in_w = x.shape + n_out_channels = weight.shape[0] + + out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1 + out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1 + + n_offset_grps = offset.shape[1] // (2 * weight_h * weight_w) + in_c_per_offset_grp = n_in_channels // n_offset_grps + + n_weight_grps = n_in_channels // weight.shape[1] + in_c_per_weight_grp = weight.shape[1] + out_c_per_weight_grp = n_out_channels // n_weight_grps + + out = torch.zeros(n_batches, n_out_channels, out_h, out_w, device=x.device, dtype=x.dtype) + for b in range(n_batches): + for c_out in range(n_out_channels): + for i in range(out_h): + for j in range(out_w): + for di in range(weight_h): + for dj in range(weight_w): + for c in range(in_c_per_weight_grp): + weight_grp = c_out // out_c_per_weight_grp + c_in = weight_grp * in_c_per_weight_grp + c + + offset_grp = c_in // in_c_per_offset_grp + mask_idx = offset_grp * (weight_h * weight_w) + di * weight_w + dj + offset_idx = 2 * mask_idx + + pi = stride_h * i - pad_h + dil_h * di + offset[b, offset_idx, i, j] + pj = stride_w * j - pad_w + dil_w * dj + offset[b, offset_idx + 1, i, j] + + mask_value = 1.0 + if mask is not None: + mask_value = mask[b, mask_idx, i, j] + + out[b, c_out, i, j] += (mask_value * weight[c_out, c, di, dj] * + bilinear_interpolate(x[b, c_in, :, :], pi, pj)) + out += bias.view(1, n_out_channels, 1, 1) + return out + + @lru_cache(maxsize=None) + def get_fn_args(self, device, contiguous, batch_sz, dtype): + n_in_channels = 6 + n_out_channels = 2 + n_weight_grps = 2 + n_offset_grps = 3 + + stride = (2, 1) + pad = (1, 0) + dilation = (2, 1) + + stride_h, stride_w = stride + pad_h, pad_w = pad + dil_h, dil_w = dilation + weight_h, weight_w = (3, 2) + in_h, in_w = (5, 4) + + out_h = (in_h + 2 * pad_h - (dil_h * (weight_h - 1) + 1)) // stride_h + 1 + out_w = (in_w + 2 * pad_w - (dil_w * (weight_w - 1) + 1)) // stride_w + 1 + + x = torch.rand(batch_sz, n_in_channels, in_h, in_w, device=device, dtype=dtype, requires_grad=True) + + offset = torch.randn(batch_sz, n_offset_grps * 2 * weight_h * weight_w, out_h, out_w, + device=device, dtype=dtype, requires_grad=True) + + mask = torch.randn(batch_sz, n_offset_grps * weight_h * weight_w, out_h, out_w, + device=device, dtype=dtype, requires_grad=True) + + weight = torch.randn(n_out_channels, n_in_channels // n_weight_grps, weight_h, weight_w, + device=device, dtype=dtype, requires_grad=True) + + bias = torch.randn(n_out_channels, device=device, dtype=dtype, requires_grad=True) + + if not contiguous: + x = x.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2) + offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1) + mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1) + weight = weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0) + + return x, weight, offset, mask, bias, stride, pad, dilation + + def _test_forward(self, device, contiguous, dtype=None): + dtype = self.dtype if dtype is None else dtype + for batch_sz in [0, 33]: + self._test_forward_with_batchsize(device, contiguous, batch_sz, dtype) + + def _test_forward_with_batchsize(self, device, contiguous, batch_sz, dtype): + x, _, offset, mask, _, stride, padding, dilation = self.get_fn_args(device, contiguous, batch_sz, dtype) + in_channels = 6 + out_channels = 2 + kernel_size = (3, 2) + groups = 2 + tol = 2e-3 if dtype is torch.half else 1e-5 + + layer = ops.DeformConv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, + dilation=dilation, groups=groups).to(device=x.device, dtype=dtype) + res = layer(x, offset, mask) + + weight = layer.weight.data + bias = layer.bias.data + expected = self.expected_fn(x, weight, offset, mask, bias, stride=stride, padding=padding, dilation=dilation) + + self.assertTrue(torch.allclose(res.to(expected.dtype), expected, rtol=tol, atol=tol), + '\nres:\n{}\nexpected:\n{}'.format(res, expected)) + + # no modulation test + res = layer(x, offset) + expected = self.expected_fn(x, weight, offset, None, bias, stride=stride, padding=padding, dilation=dilation) + + self.assertTrue(torch.allclose(res.to(expected.dtype), expected, rtol=tol, atol=tol), + '\nres:\n{}\nexpected:\n{}'.format(res, expected)) + + # test for wrong sizes + with self.assertRaises(RuntimeError): + wrong_offset = torch.rand_like(offset[:, :2]) + res = layer(x, wrong_offset) + + with self.assertRaises(RuntimeError): + wrong_mask = torch.rand_like(mask[:, :2]) + res = layer(x, offset, wrong_mask) + + def _test_backward(self, device, contiguous): + for batch_sz in [0, 33]: + self._test_backward_with_batchsize(device, contiguous, batch_sz) + + def _test_backward_with_batchsize(self, device, contiguous, batch_sz): + x, weight, offset, mask, bias, stride, padding, dilation = self.get_fn_args(device, contiguous, + batch_sz, self.dtype) + + def func(x_, offset_, mask_, weight_, bias_): + return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride, + padding=padding, dilation=dilation, mask=mask_) + + gradcheck(func, (x, offset, mask, weight, bias), nondet_tol=1e-5) + + def func_no_mask(x_, offset_, weight_, bias_): + return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride, + padding=padding, dilation=dilation, mask=None) + + gradcheck(func_no_mask, (x, offset, weight, bias), nondet_tol=1e-5) + + @torch.jit.script + def script_func(x_, offset_, mask_, weight_, bias_, stride_, pad_, dilation_): + # type:(Tensor, Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor + return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride_, + padding=pad_, dilation=dilation_, mask=mask_) + + gradcheck(lambda z, off, msk, wei, bi: script_func(z, off, msk, wei, bi, stride, padding, dilation), + (x, offset, mask, weight, bias), nondet_tol=1e-5) + + @torch.jit.script + def script_func_no_mask(x_, offset_, weight_, bias_, stride_, pad_, dilation_): + # type:(Tensor, Tensor, Tensor, Tensor, Tuple[int, int], Tuple[int, int], Tuple[int, int])->Tensor + return ops.deform_conv2d(x_, offset_, weight_, bias_, stride=stride_, + padding=pad_, dilation=dilation_, mask=None) + + gradcheck(lambda z, off, wei, bi: script_func_no_mask(z, off, wei, bi, stride, padding, dilation), + (x, offset, weight, bias), nondet_tol=1e-5) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + def test_compare_cpu_cuda_grads(self): + # Test from https://github.com/pytorch/vision/issues/2598 + # Run on CUDA only + for contiguous in [False, True]: + # compare grads computed on CUDA with grads computed on CPU + true_cpu_grads = None + + init_weight = torch.randn(9, 9, 3, 3, requires_grad=True) + img = torch.randn(8, 9, 1000, 110) + offset = torch.rand(8, 2 * 3 * 3, 1000, 110) + mask = torch.rand(8, 3 * 3, 1000, 110) + + if not contiguous: + img = img.permute(0, 1, 3, 2).contiguous().permute(0, 1, 3, 2) + offset = offset.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1) + mask = mask.permute(1, 3, 0, 2).contiguous().permute(2, 0, 3, 1) + weight = init_weight.permute(3, 2, 0, 1).contiguous().permute(2, 3, 1, 0) + else: + weight = init_weight + + for d in ["cpu", "cuda"]: + + out = ops.deform_conv2d(img.to(d), offset.to(d), weight.to(d), padding=1, mask=mask.to(d)) + out.mean().backward() + if true_cpu_grads is None: + true_cpu_grads = init_weight.grad + self.assertTrue(true_cpu_grads is not None) + else: + self.assertTrue(init_weight.grad is not None) + res_grads = init_weight.grad.to("cpu") + self.assertTrue(true_cpu_grads.allclose(res_grads)) + + @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable") + def test_autocast(self): + for dtype in (torch.float, torch.half): + with torch.cuda.amp.autocast(): + self._test_forward(torch.device("cuda"), False, dtype=dtype) + + +class FrozenBNTester(unittest.TestCase): + def test_frozenbatchnorm2d_repr(self): + num_features = 32 + eps = 1e-5 + t = ops.misc.FrozenBatchNorm2d(num_features, eps=eps) + + # Check integrity of object __repr__ attribute + expected_string = f"FrozenBatchNorm2d({num_features}, eps={eps})" + self.assertEqual(t.__repr__(), expected_string) + + def test_frozenbatchnorm2d_eps(self): + sample_size = (4, 32, 28, 28) + x = torch.rand(sample_size) + state_dict = dict(weight=torch.rand(sample_size[1]), + bias=torch.rand(sample_size[1]), + running_mean=torch.rand(sample_size[1]), + running_var=torch.rand(sample_size[1]), + num_batches_tracked=torch.tensor(100)) + + # Check that default eps is equal to the one of BN + fbn = ops.misc.FrozenBatchNorm2d(sample_size[1]) + fbn.load_state_dict(state_dict, strict=False) + bn = torch.nn.BatchNorm2d(sample_size[1]).eval() + bn.load_state_dict(state_dict) + # Difference is expected to fall in an acceptable range + self.assertTrue(torch.allclose(fbn(x), bn(x), atol=1e-6)) + + # Check computation for eps > 0 + fbn = ops.misc.FrozenBatchNorm2d(sample_size[1], eps=1e-5) + fbn.load_state_dict(state_dict, strict=False) + bn = torch.nn.BatchNorm2d(sample_size[1], eps=1e-5).eval() + bn.load_state_dict(state_dict) + self.assertTrue(torch.allclose(fbn(x), bn(x), atol=1e-6)) + + def test_frozenbatchnorm2d_n_arg(self): + """Ensure a warning is thrown when passing `n` kwarg + (remove this when support of `n` is dropped)""" + self.assertWarns(DeprecationWarning, ops.misc.FrozenBatchNorm2d, 32, eps=1e-5, n=32) + + +class BoxConversionTester(unittest.TestCase): + @staticmethod + def _get_box_sequences(): + # Define here the argument type of `boxes` supported by region pooling operations + box_tensor = torch.tensor([[0, 0, 0, 100, 100], [1, 0, 0, 100, 100]], dtype=torch.float) + box_list = [torch.tensor([[0, 0, 100, 100]], dtype=torch.float), + torch.tensor([[0, 0, 100, 100]], dtype=torch.float)] + box_tuple = tuple(box_list) + return box_tensor, box_list, box_tuple + + def test_check_roi_boxes_shape(self): + # Ensure common sequences of tensors are supported + for box_sequence in self._get_box_sequences(): + self.assertIsNone(ops._utils.check_roi_boxes_shape(box_sequence)) + + def test_convert_boxes_to_roi_format(self): + # Ensure common sequences of tensors yield the same result + ref_tensor = None + for box_sequence in self._get_box_sequences(): + if ref_tensor is None: + ref_tensor = box_sequence + else: + self.assertTrue(torch.equal(ref_tensor, ops._utils.convert_boxes_to_roi_format(box_sequence))) + + +class BoxTester(unittest.TestCase): + def test_bbox_same(self): + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + + exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + + box_same = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy") + self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_same, exp_xyxy)).item() + + box_same = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh") + self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_same, exp_xyxy)).item() + + box_same = ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh") + self.assertEqual(exp_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(exp_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_same, exp_xyxy)).item() + + def test_bbox_xyxy_xywh(self): + # Simple test convert boxes to xywh and back. Make sure they are same. + # box_tensor is in x1 y1 x2 y2 format. + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) + + box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") + self.assertEqual(exp_xywh.size(), torch.Size([4, 4])) + self.assertEqual(exp_xywh.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_xywh, exp_xywh)).item() + + # Reverse conversion + box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy") + self.assertEqual(box_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(box_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_xyxy, box_tensor)).item() + + def test_bbox_xyxy_cxcywh(self): + # Simple test convert boxes to xywh and back. Make sure they are same. + # box_tensor is in x1 y1 x2 y2 format. + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0], + [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float) + + box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") + self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4])) + self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item() + + # Reverse conversion + box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy") + self.assertEqual(box_xyxy.size(), torch.Size([4, 4])) + self.assertEqual(box_xyxy.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_xyxy, box_tensor)).item() + + def test_bbox_xywh_cxcywh(self): + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) + + # This is wrong + exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0], + [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float) + + box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh") + self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4])) + self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item() + + # Reverse conversion + box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh") + self.assertEqual(box_xywh.size(), torch.Size([4, 4])) + self.assertEqual(box_xywh.dtype, box_tensor.dtype) + assert torch.all(torch.eq(box_xywh, box_tensor)).item() + + def test_bbox_invalid(self): + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float) + + invalid_infmts = ["xwyh", "cxwyh"] + invalid_outfmts = ["xwcx", "xhwcy"] + for inv_infmt in invalid_infmts: + for inv_outfmt in invalid_outfmts: + self.assertRaises(ValueError, ops.box_convert, box_tensor, inv_infmt, inv_outfmt) + + def test_bbox_convert_jit(self): + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + + scripted_fn = torch.jit.script(ops.box_convert) + TOLERANCE = 1e-3 + + box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh") + scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh') + self.assertTrue((scripted_xywh - box_xywh).abs().max() < TOLERANCE) + + box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh") + scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh') + self.assertTrue((scripted_cxcywh - box_cxcywh).abs().max() < TOLERANCE) + + +class BoxAreaTester(unittest.TestCase): + def test_box_area(self): + def area_check(box, expected, tolerance=1e-4): + out = ops.box_area(box) + assert out.size() == expected.size() + assert ((out - expected).abs().max() < tolerance).item() + + # Check for int boxes + for dtype in [torch.int8, torch.int16, torch.int32, torch.int64]: + box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0]], dtype=dtype) + expected = torch.tensor([10000, 0]) + area_check(box_tensor, expected) + + # Check for float32 and float64 boxes + for dtype in [torch.float32, torch.float64]: + box_tensor = torch.tensor([[285.3538, 185.5758, 1193.5110, 851.4551], + [285.1472, 188.7374, 1192.4984, 851.0669], + [279.2440, 197.9812, 1189.4746, 849.2019]], dtype=dtype) + expected = torch.tensor([604723.0806, 600965.4666, 592761.0085], dtype=torch.float64) + area_check(box_tensor, expected, tolerance=0.05) + + # Check for float16 box + box_tensor = torch.tensor([[285.25, 185.625, 1194.0, 851.5], + [285.25, 188.75, 1192.0, 851.0], + [279.25, 198.0, 1189.0, 849.0]], dtype=torch.float16) + expected = torch.tensor([605113.875, 600495.1875, 592247.25]) + area_check(box_tensor, expected) + + +class BoxIouTester(unittest.TestCase): + def test_iou(self): + def iou_check(box, expected, tolerance=1e-4): + out = ops.box_iou(box, box) + assert out.size() == expected.size() + assert ((out - expected).abs().max() < tolerance).item() + + # Check for int boxes + for dtype in [torch.int16, torch.int32, torch.int64]: + box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype) + expected = torch.tensor([[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]]) + iou_check(box, expected) + + # Check for float boxes + for dtype in [torch.float16, torch.float32, torch.float64]: + box_tensor = torch.tensor([[285.3538, 185.5758, 1193.5110, 851.4551], + [285.1472, 188.7374, 1192.4984, 851.0669], + [279.2440, 197.9812, 1189.4746, 849.2019]], dtype=dtype) + expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]]) + iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-4) + + +class GenBoxIouTester(unittest.TestCase): + def test_gen_iou(self): + def gen_iou_check(box, expected, tolerance=1e-4): + out = ops.generalized_box_iou(box, box) + assert out.size() == expected.size() + assert ((out - expected).abs().max() < tolerance).item() + + # Check for int boxes + for dtype in [torch.int16, torch.int32, torch.int64]: + box = torch.tensor([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]], dtype=dtype) + expected = torch.tensor([[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611], [-0.7778, -0.8611, 1.0]]) + gen_iou_check(box, expected) + + # Check for float boxes + for dtype in [torch.float16, torch.float32, torch.float64]: + box_tensor = torch.tensor([[285.3538, 185.5758, 1193.5110, 851.4551], + [285.1472, 188.7374, 1192.4984, 851.0669], + [279.2440, 197.9812, 1189.4746, 849.2019]], dtype=dtype) + expected = torch.tensor([[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]]) + gen_iou_check(box_tensor, expected, tolerance=0.002 if dtype == torch.float16 else 1e-3) if __name__ == '__main__': diff --git a/test/test_quantized_models.py b/test/test_quantized_models.py index f20cc369276..d8fd5325755 100644 --- a/test/test_quantized_models.py +++ b/test/test_quantized_models.py @@ -83,7 +83,10 @@ def do_test(self, model_name=model_name): input_shape = (1, 3, 299, 299) self._test_classification_model(model_name, input_shape) - setattr(ModelTester, "test_" + model_name, do_test) + # inception_v3 was causing timeouts on circleci + # See https://github.com/pytorch/vision/issues/1857 + if model_name not in ['inception_v3']: + setattr(ModelTester, "test_" + model_name, do_test) if __name__ == '__main__': diff --git a/test/test_transforms.py b/test/test_transforms.py index 1bbe1165f93..0a01247aa87 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -1,9 +1,11 @@ -from __future__ import division +import itertools import os import torch import torchvision.transforms as transforms import torchvision.transforms.functional as F +import torchvision.transforms.functional_tensor as F_t from torch._utils_internal import get_file_path_2 +from numpy.testing import assert_array_almost_equal import unittest import math import random @@ -19,13 +21,16 @@ except ImportError: stats = None +from common_utils import cycle_over, int_dtypes, float_dtypes + + GRACE_HOPPER = get_file_path_2( - os.path.dirname(os.path.abspath(__file__)), 'assets', 'grace_hopper_517x606.jpg') + os.path.dirname(os.path.abspath(__file__)), 'assets', 'encode_jpeg', 'grace_hopper_517x606.jpg') class Tester(unittest.TestCase): - def test_crop(self): + def test_center_crop(self): height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 oheight = random.randint(5, (height - 2) / 2) * 2 @@ -66,6 +71,64 @@ def test_crop(self): self.assertGreater(sum2, sum1, "height: {} width: {} oheight: {} owdith: {}".format(height, width, oheight, owidth)) + def test_center_crop_2(self): + """ Tests when center crop size is larger than image size, along any dimension""" + even_image_size = (random.randint(10, 32) * 2, random.randint(10, 32) * 2) + odd_image_size = (even_image_size[0] + 1, even_image_size[1] + 1) + + # Since height is independent of width, we can ignore images with odd height and even width and vice-versa. + input_image_sizes = [even_image_size, odd_image_size] + + # Get different crop sizes + delta = random.choice((1, 3, 5)) + crop_size_delta = [-2 * delta, -delta, 0, delta, 2 * delta] + crop_size_params = itertools.product(input_image_sizes, crop_size_delta, crop_size_delta) + + for (input_image_size, delta_height, delta_width) in crop_size_params: + img = torch.ones(3, *input_image_size) + crop_size = (input_image_size[0] + delta_height, input_image_size[1] + delta_width) + + # Test both transforms, one with PIL input and one with tensor + output_pil = transforms.Compose([ + transforms.ToPILImage(), + transforms.CenterCrop(crop_size), + transforms.ToTensor()], + )(img) + self.assertEqual(output_pil.size()[1:3], crop_size, + "image_size: {} crop_size: {}".format(input_image_size, crop_size)) + + output_tensor = transforms.CenterCrop(crop_size)(img) + self.assertEqual(output_tensor.size()[1:3], crop_size, + "image_size: {} crop_size: {}".format(input_image_size, crop_size)) + + # Ensure output for PIL and Tensor are equal + self.assertEqual((output_tensor - output_pil).sum(), 0, + "image_size: {} crop_size: {}".format(input_image_size, crop_size)) + + # Check if content in center of both image and cropped output is same. + center_size = (min(crop_size[0], input_image_size[0]), min(crop_size[1], input_image_size[1])) + crop_center_tl, input_center_tl = [0, 0], [0, 0] + for index in range(2): + if crop_size[index] > input_image_size[index]: + crop_center_tl[index] = (crop_size[index] - input_image_size[index]) // 2 + else: + input_center_tl[index] = (input_image_size[index] - crop_size[index]) // 2 + + output_center = output_pil[ + :, + crop_center_tl[0]:crop_center_tl[0] + center_size[0], + crop_center_tl[1]:crop_center_tl[1] + center_size[1] + ] + + img_center = img[ + :, + input_center_tl[0]:input_center_tl[0] + center_size[0], + input_center_tl[1]:input_center_tl[1] + center_size[1] + ] + + self.assertEqual((output_center - img_center).sum(), 0, + "image_size: {} crop_size: {}".format(input_image_size, crop_size)) + def test_five_crop(self): to_pil_image = transforms.ToPILImage() h = random.randint(5, 25) @@ -175,49 +238,115 @@ def test_randomperspective(self): self.assertGreater(torch.nn.functional.mse_loss(tr_img, F.to_tensor(img)) + 0.3, torch.nn.functional.mse_loss(tr_img2, F.to_tensor(img))) - def test_resize(self): - height = random.randint(24, 32) * 2 - width = random.randint(24, 32) * 2 - osize = random.randint(5, 12) * 2 + def test_randomperspective_fill(self): + + # assert fill being either a Sequence or a Number + with self.assertRaises(TypeError): + transforms.RandomPerspective(fill={}) + + t = transforms.RandomPerspective(fill=None) + self.assertTrue(t.fill == 0) + height = 100 + width = 100 img = torch.ones(3, height, width) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize(osize), - transforms.ToTensor(), - ])(img) - self.assertIn(osize, result.size()) - if height < width: - self.assertLessEqual(result.size(1), result.size(2)) - elif width < height: - self.assertGreaterEqual(result.size(1), result.size(2)) + to_pil_image = transforms.ToPILImage() + img = to_pil_image(img) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize([osize, osize]), - transforms.ToTensor(), - ])(img) - self.assertIn(osize, result.size()) - self.assertEqual(result.size(1), osize) - self.assertEqual(result.size(2), osize) + modes = ("L", "RGB", "F") + nums_bands = [len(mode) for mode in modes] + fill = 127 - oheight = random.randint(5, 12) * 2 - owidth = random.randint(5, 12) * 2 - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize((oheight, owidth)), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.size(1), oheight) - self.assertEqual(result.size(2), owidth) + for mode, num_bands in zip(modes, nums_bands): + img_conv = img.convert(mode) + perspective = transforms.RandomPerspective(p=1, fill=fill) + tr_img = perspective(img_conv) + pixel = tr_img.getpixel((0, 0)) - result = transforms.Compose([ - transforms.ToPILImage(), - transforms.Resize([oheight, owidth]), - transforms.ToTensor(), - ])(img) - self.assertEqual(result.size(1), oheight) - self.assertEqual(result.size(2), owidth) + if not isinstance(pixel, tuple): + pixel = (pixel,) + self.assertTupleEqual(pixel, tuple([fill] * num_bands)) + + for mode, num_bands in zip(modes, nums_bands): + img_conv = img.convert(mode) + startpoints, endpoints = transforms.RandomPerspective.get_params(width, height, 0.5) + tr_img = F.perspective(img_conv, startpoints, endpoints, fill=fill) + pixel = tr_img.getpixel((0, 0)) + + if not isinstance(pixel, tuple): + pixel = (pixel,) + self.assertTupleEqual(pixel, tuple([fill] * num_bands)) + + for wrong_num_bands in set(nums_bands) - {num_bands}: + with self.assertRaises(ValueError): + F.perspective(img_conv, startpoints, endpoints, fill=tuple([fill] * wrong_num_bands)) + + def test_resize(self): + + input_sizes = [ + # height, width + # square image + (28, 28), + (27, 27), + # rectangular image: h < w + (28, 34), + (29, 35), + # rectangular image: h > w + (34, 28), + (35, 29), + ] + test_output_sizes_1 = [ + # single integer + 22, 27, 28, 36, + # single integer in tuple/list + [22, ], (27, ), + ] + test_output_sizes_2 = [ + # two integers + [22, 22], [22, 28], [22, 36], + [27, 22], [36, 22], [28, 28], + [28, 37], [37, 27], [37, 37] + ] + + for height, width in input_sizes: + img = Image.new("RGB", size=(width, height), color=127) + + for osize in test_output_sizes_1: + for max_size in (None, 37, 1000): + + t = transforms.Resize(osize, max_size=max_size) + result = t(img) + + msg = "{}, {} - {} - {}".format(height, width, osize, max_size) + osize = osize[0] if isinstance(osize, (list, tuple)) else osize + # If size is an int, smaller edge of the image will be matched to this number. + # i.e, if height > width, then image will be rescaled to (size * height / width, size). + if height < width: + exp_w, exp_h = (int(osize * width / height), osize) # (w, h) + if max_size is not None and max_size < exp_w: + exp_w, exp_h = max_size, int(max_size * exp_h / exp_w) + self.assertEqual(result.size, (exp_w, exp_h), msg=msg) + elif width < height: + exp_w, exp_h = (osize, int(osize * height / width)) # (w, h) + if max_size is not None and max_size < exp_h: + exp_w, exp_h = int(max_size * exp_w / exp_h), max_size + self.assertEqual(result.size, (exp_w, exp_h), msg=msg) + else: + exp_w, exp_h = (osize, osize) # (w, h) + if max_size is not None and max_size < osize: + exp_w, exp_h = max_size, max_size + self.assertEqual(result.size, (exp_w, exp_h), msg=msg) + + for height, width in input_sizes: + img = Image.new("RGB", size=(width, height), color=127) + + for osize in test_output_sizes_2: + oheight, owidth = osize + + t = transforms.Resize(osize) + result = t(img) + + self.assertEqual((owidth, oheight), result.size) def test_random_crop(self): height = random.randint(10, 32) * 2 @@ -259,18 +388,32 @@ def test_random_crop(self): self.assertEqual(result.size(1), height + 1) self.assertEqual(result.size(2), width + 1) + t = transforms.RandomCrop(48) + img = torch.ones(3, 32, 32) + with self.assertRaisesRegex(ValueError, r"Required crop size .+ is larger then input image size .+"): + t(img) + def test_pad(self): height = random.randint(10, 32) * 2 width = random.randint(10, 32) * 2 img = torch.ones(3, height, width) padding = random.randint(1, 20) + fill = random.randint(1, 50) result = transforms.Compose([ transforms.ToPILImage(), - transforms.Pad(padding), + transforms.Pad(padding, fill=fill), transforms.ToTensor(), ])(img) self.assertEqual(result.size(1), height + 2 * padding) self.assertEqual(result.size(2), width + 2 * padding) + # check that all elements in the padded region correspond + # to the pad value + fill_v = fill / 255 + eps = 1e-5 + self.assertTrue((result[:, :padding, :] - fill_v).abs().max() < eps) + self.assertTrue((result[:, :, :padding] - fill_v).abs().max() < eps) + self.assertRaises(ValueError, transforms.Pad(padding, fill=(1, 2)), + transforms.ToPILImage()(img)) def test_pad_with_tuple_of_pad_values(self): height = random.randint(10, 32) * 2 @@ -320,6 +463,16 @@ def test_pad_with_non_constant_padding_modes(self): self.assertTrue(np.all(symmetric_middle_slice == np.asarray([0, 1, 200, 200, 1, 0]))) self.assertEqual(transforms.ToTensor()(symmetric_padded_img).size(), (3, 32, 34)) + # Check negative padding explicitly for symmetric case, since it is not + # implemented for tensor case to compare to + # Crop 1 to left, pad 2 to top, pad 3 to right, crop 3 to bottom + symmetric_padded_img_neg = F.pad(img, (-1, 2, 3, -3), padding_mode='symmetric') + symmetric_neg_middle_left = np.asarray(symmetric_padded_img_neg).transpose(2, 0, 1)[0][17][:3] + symmetric_neg_middle_right = np.asarray(symmetric_padded_img_neg).transpose(2, 0, 1)[0][17][-4:] + self.assertTrue(np.all(symmetric_neg_middle_left == np.asarray([1, 0, 0]))) + self.assertTrue(np.all(symmetric_neg_middle_right == np.asarray([200, 200, 0, 0]))) + self.assertEqual(transforms.ToTensor()(symmetric_padded_img_neg).size(), (3, 28, 31)) + def test_pad_raises_with_invalid_pad_sequence_len(self): with self.assertRaises(ValueError): transforms.Pad(()) @@ -330,6 +483,14 @@ def test_pad_raises_with_invalid_pad_sequence_len(self): with self.assertRaises(ValueError): transforms.Pad((1, 2, 3, 4, 5)) + def test_pad_with_mode_F_images(self): + pad = 2 + transform = transforms.Pad(pad) + + img = Image.new("F", (10, 10)) + padded_img = transform(img) + self.assertSequenceEqual(padded_img.size, [edge_size + 2 * pad for edge_size in img.size]) + def test_lambda(self): trans = transforms.Lambda(lambda x: x.add(10)) x = torch.randn(10) @@ -466,6 +627,148 @@ def test_to_tensor(self): output = trans(img) self.assertTrue(np.allclose(input_data.numpy(), output.numpy())) + def test_to_tensor_with_other_default_dtypes(self): + current_def_dtype = torch.get_default_dtype() + + t = transforms.ToTensor() + np_arr = np.random.randint(0, 255, (32, 32, 3), dtype=np.uint8) + img = Image.fromarray(np_arr) + + for dtype in [torch.float16, torch.float, torch.double]: + torch.set_default_dtype(dtype) + res = t(img) + self.assertTrue(res.dtype == dtype, msg=f"{res.dtype} vs {dtype}") + + torch.set_default_dtype(current_def_dtype) + + def test_max_value(self): + for dtype in int_dtypes(): + self.assertEqual(F_t._max_value(dtype), torch.iinfo(dtype).max) + + # remove float testing as it can lead to errors such as + # runtime error: 5.7896e+76 is outside the range of representable values of type 'float' + # for dtype in float_dtypes(): + # self.assertGreater(F_t._max_value(dtype), torch.finfo(dtype).max) + + def test_convert_image_dtype_float_to_float(self): + for input_dtype, output_dtypes in cycle_over(float_dtypes()): + input_image = torch.tensor((0.0, 1.0), dtype=input_dtype) + for output_dtype in output_dtypes: + with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype): + transform = transforms.ConvertImageDtype(output_dtype) + transform_script = torch.jit.script(F.convert_image_dtype) + + output_image = transform(input_image) + output_image_script = transform_script(input_image, output_dtype) + + script_diff = output_image_script - output_image + self.assertLess(script_diff.abs().max(), 1e-6) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0.0, 1.0 + + self.assertAlmostEqual(actual_min, desired_min) + self.assertAlmostEqual(actual_max, desired_max) + + def test_convert_image_dtype_float_to_int(self): + for input_dtype in float_dtypes(): + input_image = torch.tensor((0.0, 1.0), dtype=input_dtype) + for output_dtype in int_dtypes(): + with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype): + transform = transforms.ConvertImageDtype(output_dtype) + transform_script = torch.jit.script(F.convert_image_dtype) + + if (input_dtype == torch.float32 and output_dtype in (torch.int32, torch.int64)) or ( + input_dtype == torch.float64 and output_dtype == torch.int64 + ): + with self.assertRaises(RuntimeError): + transform(input_image) + else: + output_image = transform(input_image) + output_image_script = transform_script(input_image, output_dtype) + + script_diff = output_image_script - output_image + self.assertLess(script_diff.abs().max(), 1e-6) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0, torch.iinfo(output_dtype).max + + self.assertEqual(actual_min, desired_min) + self.assertEqual(actual_max, desired_max) + + def test_convert_image_dtype_int_to_float(self): + for input_dtype in int_dtypes(): + input_image = torch.tensor((0, torch.iinfo(input_dtype).max), dtype=input_dtype) + for output_dtype in float_dtypes(): + with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype): + transform = transforms.ConvertImageDtype(output_dtype) + transform_script = torch.jit.script(F.convert_image_dtype) + + output_image = transform(input_image) + output_image_script = transform_script(input_image, output_dtype) + + script_diff = output_image_script - output_image + self.assertLess(script_diff.abs().max(), 1e-6) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0.0, 1.0 + + self.assertAlmostEqual(actual_min, desired_min) + self.assertGreaterEqual(actual_min, desired_min) + self.assertAlmostEqual(actual_max, desired_max) + self.assertLessEqual(actual_max, desired_max) + + def test_convert_image_dtype_int_to_int(self): + for input_dtype, output_dtypes in cycle_over(int_dtypes()): + input_max = torch.iinfo(input_dtype).max + input_image = torch.tensor((0, input_max), dtype=input_dtype) + for output_dtype in output_dtypes: + output_max = torch.iinfo(output_dtype).max + + with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype): + transform = transforms.ConvertImageDtype(output_dtype) + transform_script = torch.jit.script(F.convert_image_dtype) + + output_image = transform(input_image) + output_image_script = transform_script(input_image, output_dtype) + + script_diff = output_image_script.float() - output_image.float() + self.assertLess( + script_diff.abs().max(), 1e-6, msg="{} vs {}".format(output_image_script, output_image) + ) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0, output_max + + # see https://github.com/pytorch/vision/pull/2078#issuecomment-641036236 for details + if input_max >= output_max: + error_term = 0 + else: + error_term = 1 - (torch.iinfo(output_dtype).max + 1) // (torch.iinfo(input_dtype).max + 1) + + self.assertEqual(actual_min, desired_min) + self.assertEqual(actual_max, desired_max + error_term) + + def test_convert_image_dtype_int_to_int_consistency(self): + for input_dtype, output_dtypes in cycle_over(int_dtypes()): + input_max = torch.iinfo(input_dtype).max + input_image = torch.tensor((0, input_max), dtype=input_dtype) + for output_dtype in output_dtypes: + output_max = torch.iinfo(output_dtype).max + if output_max <= input_max: + continue + + with self.subTest(input_dtype=input_dtype, output_dtype=output_dtype): + transform = transforms.ConvertImageDtype(output_dtype) + inverse_transfrom = transforms.ConvertImageDtype(input_dtype) + output_image = inverse_transfrom(transform(input_image)) + + actual_min, actual_max = output_image.tolist() + desired_min, desired_max = 0, input_max + + self.assertEqual(actual_min, desired_min) + self.assertEqual(actual_max, desired_max) + @unittest.skipIf(accimage is None, 'accimage not available') def test_accimage_to_tensor(self): trans = transforms.ToTensor() @@ -476,6 +779,49 @@ def test_accimage_to_tensor(self): self.assertEqual(expected_output.size(), output.size()) self.assertTrue(np.allclose(output.numpy(), expected_output.numpy())) + def test_pil_to_tensor(self): + test_channels = [1, 3, 4] + height, width = 4, 4 + trans = transforms.PILToTensor() + + with self.assertRaises(TypeError): + trans(np.random.rand(1, height, width).tolist()) + trans(np.random.rand(1, height, width)) + + for channels in test_channels: + input_data = torch.ByteTensor(channels, height, width).random_(0, 255) + img = transforms.ToPILImage()(input_data) + output = trans(img) + self.assertTrue(np.allclose(input_data.numpy(), output.numpy())) + + input_data = np.random.randint(low=0, high=255, size=(height, width, channels)).astype(np.uint8) + img = transforms.ToPILImage()(input_data) + output = trans(img) + expected_output = input_data.transpose((2, 0, 1)) + self.assertTrue(np.allclose(output.numpy(), expected_output)) + + input_data = torch.as_tensor(np.random.rand(channels, height, width).astype(np.float32)) + img = transforms.ToPILImage()(input_data) # CHW -> HWC and (* 255).byte() + output = trans(img) # HWC -> CHW + expected_output = (input_data * 255).byte() + self.assertTrue(np.allclose(output.numpy(), expected_output.numpy())) + + # separate test for mode '1' PIL images + input_data = torch.ByteTensor(1, height, width).bernoulli_() + img = transforms.ToPILImage()(input_data.mul(255)).convert('1') + output = trans(img) + self.assertTrue(np.allclose(input_data.numpy(), output.numpy())) + + @unittest.skipIf(accimage is None, 'accimage not available') + def test_accimage_pil_to_tensor(self): + trans = transforms.PILToTensor() + + expected_output = trans(Image.open(GRACE_HOPPER).convert('RGB')) + output = trans(accimage.Image(GRACE_HOPPER)) + + self.assertEqual(expected_output.size(), output.size()) + self.assertTrue(np.allclose(output.numpy(), expected_output.numpy())) + @unittest.skipIf(accimage is None, 'accimage not available') def test_accimage_resize(self): trans = transforms.Compose([ @@ -731,19 +1077,27 @@ def test_2d_ndarray_to_pil_image(self): self.assertTrue(np.allclose(img_data, img)) def test_tensor_bad_types_to_pil_image(self): - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, r'pic should be 2/3 dimensional. Got \d+ dimensions.'): transforms.ToPILImage()(torch.ones(1, 3, 4, 4)) + with self.assertRaisesRegex(ValueError, r'pic should not have > 4 channels. Got \d+ channels.'): + transforms.ToPILImage()(torch.ones(6, 4, 4)) def test_ndarray_bad_types_to_pil_image(self): trans = transforms.ToPILImage() - with self.assertRaises(TypeError): + reg_msg = r'Input type \w+ is not supported' + with self.assertRaisesRegex(TypeError, reg_msg): trans(np.ones([4, 4, 1], np.int64)) + with self.assertRaisesRegex(TypeError, reg_msg): trans(np.ones([4, 4, 1], np.uint16)) + with self.assertRaisesRegex(TypeError, reg_msg): trans(np.ones([4, 4, 1], np.uint32)) + with self.assertRaisesRegex(TypeError, reg_msg): trans(np.ones([4, 4, 1], np.float64)) - with self.assertRaises(ValueError): + with self.assertRaisesRegex(ValueError, r'pic should be 2/3 dimensional. Got \d+ dimensions.'): transforms.ToPILImage()(np.ones([1, 4, 4, 3])) + with self.assertRaisesRegex(ValueError, r'pic should not have > 4 channels. Got \d+ channels.'): + transforms.ToPILImage()(np.ones([4, 4, 6])) @unittest.skipIf(stats is None, 'scipy.stats not available') def test_random_vertical_flip(self): @@ -842,6 +1196,24 @@ def test_normalize_different_dtype(self): # checks that it doesn't crash transforms.functional.normalize(img, mean, std) + def test_normalize_3d_tensor(self): + torch.manual_seed(28) + n_channels = 3 + img_size = 10 + mean = torch.rand(n_channels) + std = torch.rand(n_channels) + img = torch.rand(n_channels, img_size, img_size) + target = F.normalize(img, mean, std).numpy() + + mean_unsqueezed = mean.view(-1, 1, 1) + std_unsqueezed = std.view(-1, 1, 1) + result1 = F.normalize(img, mean_unsqueezed, std_unsqueezed) + result2 = F.normalize(img, + mean_unsqueezed.repeat(1, img_size, img_size), + std_unsqueezed.repeat(1, img_size, img_size)) + assert_array_almost_equal(target, result1.numpy()) + assert_array_almost_equal(target, result2.numpy()) + def test_adjust_brightness(self): x_shape = [2, 2, 3] x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] @@ -892,6 +1264,7 @@ def test_adjust_contrast(self): y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) self.assertTrue(np.allclose(y_np, y_ans)) + @unittest.skipIf(Image.__version__ >= '7', "Temporarily disabled") def test_adjust_saturation(self): x_shape = [2, 2, 3] x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] @@ -949,6 +1322,48 @@ def test_adjust_hue(self): y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) self.assertTrue(np.allclose(y_np, y_ans)) + def test_adjust_sharpness(self): + x_shape = [4, 4, 3] + x_data = [75, 121, 114, 105, 97, 107, 105, 32, 66, 111, 117, 114, 99, 104, 97, 0, + 0, 65, 108, 101, 120, 97, 110, 100, 101, 114, 32, 86, 114, 121, 110, 105, + 111, 116, 105, 115, 0, 0, 73, 32, 108, 111, 118, 101, 32, 121, 111, 117] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode='RGB') + + # test 0 + y_pil = F.adjust_sharpness(x_pil, 1) + y_np = np.array(y_pil) + self.assertTrue(np.allclose(y_np, x_np)) + + # test 1 + y_pil = F.adjust_sharpness(x_pil, 0.5) + y_np = np.array(y_pil) + y_ans = [75, 121, 114, 105, 97, 107, 105, 32, 66, 111, 117, 114, 99, 104, 97, 30, + 30, 74, 103, 96, 114, 97, 110, 100, 101, 114, 32, 81, 103, 108, 102, 101, + 107, 116, 105, 115, 0, 0, 73, 32, 108, 111, 118, 101, 32, 121, 111, 117] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + self.assertTrue(np.allclose(y_np, y_ans)) + + # test 2 + y_pil = F.adjust_sharpness(x_pil, 2) + y_np = np.array(y_pil) + y_ans = [75, 121, 114, 105, 97, 107, 105, 32, 66, 111, 117, 114, 99, 104, 97, 0, + 0, 46, 118, 111, 132, 97, 110, 100, 101, 114, 32, 95, 135, 146, 126, 112, + 119, 116, 105, 115, 0, 0, 73, 32, 108, 111, 118, 101, 32, 121, 111, 117] + y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) + self.assertTrue(np.allclose(y_np, y_ans)) + + # test 3 + x_shape = [2, 2, 3] + x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] + x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) + x_pil = Image.fromarray(x_np, mode='RGB') + x_th = torch.tensor(x_np.transpose(2, 0, 1)) + y_pil = F.adjust_sharpness(x_pil, 2) + y_np = np.array(y_pil).transpose(2, 0, 1) + y_th = F.adjust_sharpness(x_th, 2) + self.assertTrue(np.allclose(y_np, y_th.numpy())) + def test_adjust_gamma(self): x_shape = [2, 2, 3] x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] @@ -963,14 +1378,14 @@ def test_adjust_gamma(self): # test 1 y_pil = F.adjust_gamma(x_pil, 0.5) y_np = np.array(y_pil) - y_ans = [0, 35, 57, 117, 185, 240, 97, 45, 244, 151, 255, 15] + y_ans = [0, 35, 57, 117, 186, 241, 97, 45, 245, 152, 255, 16] y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) self.assertTrue(np.allclose(y_np, y_ans)) # test 2 y_pil = F.adjust_gamma(x_pil, 2) y_np = np.array(y_pil) - y_ans = [0, 0, 0, 11, 71, 200, 5, 0, 214, 31, 255, 0] + y_ans = [0, 0, 0, 11, 71, 201, 5, 0, 215, 31, 255, 0] y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) self.assertTrue(np.allclose(y_np, y_ans)) @@ -985,6 +1400,7 @@ def test_adjusts_L_mode(self): self.assertEqual(F.adjust_saturation(x_l, 2).mode, 'L') self.assertEqual(F.adjust_contrast(x_l, 2).mode, 'L') self.assertEqual(F.adjust_hue(x_l, 0.4).mode, 'L') + self.assertEqual(F.adjust_sharpness(x_l, 2).mode, 'L') self.assertEqual(F.adjust_gamma(x_l, 0.5).mode, 'L') def test_color_jitter(self): @@ -1042,7 +1458,7 @@ def test_rotate(self): x = np.zeros((100, 100, 3), dtype=np.uint8) x[40, 40] = [255, 255, 255] - with self.assertRaises(TypeError): + with self.assertRaisesRegex(TypeError, r"img should be PIL Image"): F.rotate(x, 10) img = F.to_pil_image(x) @@ -1073,19 +1489,36 @@ def test_rotate(self): self.assertTrue(np.all(np.array(result_a) == np.array(result_b))) + def test_rotate_fill(self): + img = F.to_pil_image(np.ones((100, 100, 3), dtype=np.uint8) * 255, "RGB") + + modes = ("L", "RGB", "F") + nums_bands = [len(mode) for mode in modes] + fill = 127 + + for mode, num_bands in zip(modes, nums_bands): + img_conv = img.convert(mode) + img_rot = F.rotate(img_conv, 45.0, fill=fill) + pixel = img_rot.getpixel((0, 0)) + + if not isinstance(pixel, tuple): + pixel = (pixel,) + self.assertTupleEqual(pixel, tuple([fill] * num_bands)) + + for wrong_num_bands in set(nums_bands) - {num_bands}: + with self.assertRaises(ValueError): + F.rotate(img_conv, 45.0, fill=tuple([fill] * wrong_num_bands)) + def test_affine(self): input_img = np.zeros((40, 40, 3), dtype=np.uint8) - pts = [] cnt = [20, 20] for pt in [(16, 16), (20, 16), (20, 20)]: for i in range(-5, 5): for j in range(-5, 5): input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55] - pts.append((pt[0] + i, pt[1] + j)) - pts = list(set(pts)) - with self.assertRaises(TypeError): - F.affine(input_img, 10) + with self.assertRaises(TypeError, msg="Argument translate should be a sequence"): + F.affine(input_img, 10, translate=0, scale=1, shear=1) pil_img = F.to_pil_image(input_img) @@ -1137,9 +1570,12 @@ def _test_transformation(a, t, s, sh): inv_true_matrix = np.linalg.inv(true_matrix) for y in range(true_result.shape[0]): for x in range(true_result.shape[1]): - res = np.dot(inv_true_matrix, [x, y, 1]) - _x = int(res[0] + 0.5) - _y = int(res[1] + 0.5) + # Same as for PIL: + # https://github.com/python-pillow/Pillow/blob/71f8ec6a0cfc1008076a023c0756542539d057ab/ + # src/libImaging/Geometry.c#L1060 + input_pt = np.array([x + 0.5, y + 0.5, 1.0]) + res = np.floor(np.dot(inv_true_matrix, input_pt)).astype(np.int) + _x, _y = res[:2] if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]: true_result[y, x, :] = input_img[_y, _x, :] @@ -1172,7 +1608,7 @@ def _test_transformation(a, t, s, sh): # Test rotation, scale, translation, shear for a in range(-90, 90, 25): for t1 in range(-10, 10, 5): - for s in [0.75, 0.98, 1.0, 1.1, 1.2]: + for s in [0.75, 0.98, 1.0, 1.2, 1.4]: for sh in range(-15, 15, 5): _test_transformation(a=a, t=(t1, t1), s=s, sh=(sh, sh)) @@ -1183,17 +1619,34 @@ def test_random_rotation(self): transforms.RandomRotation([-0.7]) transforms.RandomRotation([-0.7, 0, 0.7]) + # assert fill being either a Sequence or a Number + with self.assertRaises(TypeError): + transforms.RandomRotation(0, fill={}) + + t = transforms.RandomRotation(0, fill=None) + self.assertTrue(t.fill == 0) + t = transforms.RandomRotation(10) angle = t.get_params(t.degrees) self.assertTrue(angle > -10 and angle < 10) t = transforms.RandomRotation((-10, 10)) angle = t.get_params(t.degrees) - self.assertTrue(angle > -10 and angle < 10) + self.assertTrue(-10 < angle < 10) # Checking if RandomRotation can be printed as string t.__repr__() + # assert deprecation warning and non-BC + with self.assertWarnsRegex(UserWarning, r"Argument resample is deprecated and will be removed"): + t = transforms.RandomRotation((-10, 10), resample=2) + self.assertEqual(t.interpolation, transforms.InterpolationMode.BILINEAR) + + # assert changed type warning + with self.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationMode"): + t = transforms.RandomRotation((-10, 10), interpolation=2) + self.assertEqual(t.interpolation, transforms.InterpolationMode.BILINEAR) + def test_random_affine(self): with self.assertRaises(ValueError): @@ -1215,6 +1668,13 @@ def test_random_affine(self): transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10]) transforms.RandomAffine([-90, 90], translate=[0.2, 0.2], scale=[0.5, 0.5], shear=[-10, 0, 10, 0, 10]) + # assert fill being either a Sequence or a Number + with self.assertRaises(TypeError): + transforms.RandomAffine(0, fill={}) + + t = transforms.RandomAffine(0, fill=None) + self.assertTrue(t.fill == 0) + x = np.zeros((100, 100, 3), dtype=np.uint8) img = F.to_pil_image(x) @@ -1234,8 +1694,22 @@ def test_random_affine(self): # Checking if RandomAffine can be printed as string t.__repr__() - t = transforms.RandomAffine(10, resample=Image.BILINEAR) - self.assertIn("Image.BILINEAR", t.__repr__()) + t = transforms.RandomAffine(10, interpolation=transforms.InterpolationMode.BILINEAR) + self.assertIn("bilinear", t.__repr__()) + + # assert deprecation warning and non-BC + with self.assertWarnsRegex(UserWarning, r"Argument resample is deprecated and will be removed"): + t = transforms.RandomAffine(10, resample=2) + self.assertEqual(t.interpolation, transforms.InterpolationMode.BILINEAR) + + with self.assertWarnsRegex(UserWarning, r"Argument fillcolor is deprecated and will be removed"): + t = transforms.RandomAffine(10, fillcolor=10) + self.assertEqual(t.fill, 10) + + # assert changed type warning + with self.assertWarnsRegex(UserWarning, r"Argument interpolation should be of type InterpolationMode"): + t = transforms.RandomAffine(10, interpolation=2) + self.assertEqual(t.interpolation, transforms.InterpolationMode.BILINEAR) def test_to_grayscale(self): """Unit tests for grayscale transform""" @@ -1380,40 +1854,152 @@ def test_random_grayscale(self): # Checking if RandomGrayscale can be printed as string trans3.__repr__() + def test_gaussian_blur_asserts(self): + np_img = np.ones((100, 100, 3), dtype=np.uint8) * 255 + img = F.to_pil_image(np_img, "RGB") + + with self.assertRaisesRegex(ValueError, r"If kernel_size is a sequence its length should be 2"): + F.gaussian_blur(img, [3]) + + with self.assertRaisesRegex(ValueError, r"If kernel_size is a sequence its length should be 2"): + F.gaussian_blur(img, [3, 3, 3]) + with self.assertRaisesRegex(ValueError, r"Kernel size should be a tuple/list of two integers"): + transforms.GaussianBlur([3, 3, 3]) + + with self.assertRaisesRegex(ValueError, r"kernel_size should have odd and positive integers"): + F.gaussian_blur(img, [4, 4]) + with self.assertRaisesRegex(ValueError, r"Kernel size value should be an odd and positive number"): + transforms.GaussianBlur([4, 4]) + + with self.assertRaisesRegex(ValueError, r"kernel_size should have odd and positive integers"): + F.gaussian_blur(img, [-3, -3]) + with self.assertRaisesRegex(ValueError, r"Kernel size value should be an odd and positive number"): + transforms.GaussianBlur([-3, -3]) + + with self.assertRaisesRegex(ValueError, r"If sigma is a sequence, its length should be 2"): + F.gaussian_blur(img, 3, [1, 1, 1]) + with self.assertRaisesRegex(ValueError, r"sigma should be a single number or a list/tuple with length 2"): + transforms.GaussianBlur(3, [1, 1, 1]) + + with self.assertRaisesRegex(ValueError, r"sigma should have positive values"): + F.gaussian_blur(img, 3, -1.0) + with self.assertRaisesRegex(ValueError, r"If sigma is a single number, it must be positive"): + transforms.GaussianBlur(3, -1.0) + + with self.assertRaisesRegex(TypeError, r"kernel_size should be int or a sequence of integers"): + F.gaussian_blur(img, "kernel_size_string") + with self.assertRaisesRegex(ValueError, r"Kernel size should be a tuple/list of two integers"): + transforms.GaussianBlur("kernel_size_string") + + with self.assertRaisesRegex(TypeError, r"sigma should be either float or sequence of floats"): + F.gaussian_blur(img, 3, "sigma_string") + with self.assertRaisesRegex(ValueError, r"sigma should be a single number or a list/tuple with length 2"): + transforms.GaussianBlur(3, "sigma_string") + + def _test_randomness(self, fn, trans, configs): + random_state = random.getstate() + random.seed(42) + img = transforms.ToPILImage()(torch.rand(3, 16, 18)) + + for p in [0.5, 0.7]: + for config in configs: + inv_img = fn(img, **config) + + num_samples = 250 + counts = 0 + for _ in range(num_samples): + tranformation = trans(p=p, **config) + tranformation.__repr__() + out = tranformation(img) + if out == inv_img: + counts += 1 + + p_value = stats.binom_test(counts, num_samples, p=p) + random.setstate(random_state) + self.assertGreater(p_value, 0.0001) + + @unittest.skipIf(stats is None, 'scipy.stats not available') + def test_random_invert(self): + self._test_randomness( + F.invert, + transforms.RandomInvert, + [{}] + ) + + @unittest.skipIf(stats is None, 'scipy.stats not available') + def test_random_posterize(self): + self._test_randomness( + F.posterize, + transforms.RandomPosterize, + [{"bits": 4}] + ) + + @unittest.skipIf(stats is None, 'scipy.stats not available') + def test_random_solarize(self): + self._test_randomness( + F.solarize, + transforms.RandomSolarize, + [{"threshold": 192}] + ) + + @unittest.skipIf(stats is None, 'scipy.stats not available') + def test_random_adjust_sharpness(self): + self._test_randomness( + F.adjust_sharpness, + transforms.RandomAdjustSharpness, + [{"sharpness_factor": 2.0}] + ) + + @unittest.skipIf(stats is None, 'scipy.stats not available') + def test_random_autocontrast(self): + self._test_randomness( + F.autocontrast, + transforms.RandomAutocontrast, + [{}] + ) + + @unittest.skipIf(stats is None, 'scipy.stats not available') + def test_random_equalize(self): + self._test_randomness( + F.equalize, + transforms.RandomEqualize, + [{}] + ) + + def test_autoaugment(self): + for policy in transforms.AutoAugmentPolicy: + for fill in [None, 85, (128, 128, 128)]: + random.seed(42) + img = Image.open(GRACE_HOPPER) + transform = transforms.AutoAugment(policy=policy, fill=fill) + for _ in range(100): + img = transform(img) + transform.__repr__() + + @unittest.skipIf(stats is None, 'scipy.stats not available') def test_random_erasing(self): - """Unit tests for random erasing transform""" - - img = torch.rand([3, 60, 60]) - - # Test Set 1: Erasing with int value - img_re = transforms.RandomErasing(value=0.2) - i, j, h, w, v = img_re.get_params(img, scale=img_re.scale, ratio=img_re.ratio, value=img_re.value) - img_output = F.erase(img, i, j, h, w, v) - self.assertEqual(img_output.size(0), 3) - - # Test Set 2: Check if the unerased region is preserved - orig_unerased = img.clone() - orig_unerased[:, i:i + h, j:j + w] = 0 - output_unerased = img_output.clone() - output_unerased[:, i:i + h, j:j + w] = 0 - self.assertTrue(torch.equal(orig_unerased, output_unerased)) - - # Test Set 3: Erasing with random value - img_re = transforms.RandomErasing(value='random')(img) - self.assertEqual(img_re.size(0), 3) - - # Test Set 4: Erasing with tuple value - img_re = transforms.RandomErasing(value=(0.2, 0.2, 0.2))(img) - self.assertEqual(img_re.size(0), 3) - - # Test Set 5: Testing the inplace behaviour - img_re = transforms.RandomErasing(value=(0.2), inplace=True)(img) - self.assertTrue(torch.equal(img_re, img)) - - # Test Set 6: Checking when no erased region is selected - img = torch.rand([3, 300, 1]) - img_re = transforms.RandomErasing(ratio=(0.1, 0.2), value='random')(img) - self.assertTrue(torch.equal(img_re, img)) + img = torch.ones(3, 128, 128) + + t = transforms.RandomErasing(scale=(0.1, 0.1), ratio=(1 / 3, 3.)) + y, x, h, w, v = t.get_params(img, t.scale, t.ratio, [t.value, ]) + aspect_ratio = h / w + # Add some tolerance due to the rounding and int conversion used in the transform + tol = 0.05 + self.assertTrue(1 / 3 - tol <= aspect_ratio <= 3 + tol) + + aspect_ratios = [] + random.seed(42) + trial = 1000 + for _ in range(trial): + y, x, h, w, v = t.get_params(img, t.scale, t.ratio, [t.value, ]) + aspect_ratios.append(h / w) + + count_bigger_then_ones = len([1 for aspect_ratio in aspect_ratios if aspect_ratio > 1]) + p_value = stats.binom_test(count_bigger_then_ones, trial, p=0.5) + self.assertGreater(p_value, 0.0001) + + # Checking if RandomErasing can be printed as string + t.__repr__() if __name__ == '__main__': diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py new file mode 100644 index 00000000000..1bd0099af63 --- /dev/null +++ b/test/test_transforms_tensor.py @@ -0,0 +1,655 @@ +import os +import torch +from torchvision import transforms as T +from torchvision.transforms import functional as F +from torchvision.transforms import InterpolationMode + +import numpy as np + +import unittest +from typing import Sequence + +from common_utils import TransformsTester, get_tmp_dir, int_dtypes, float_dtypes + + +NEAREST, BILINEAR, BICUBIC = InterpolationMode.NEAREST, InterpolationMode.BILINEAR, InterpolationMode.BICUBIC + + +class Tester(TransformsTester): + + def setUp(self): + self.device = "cpu" + + def _test_functional_op(self, func, fn_kwargs): + if fn_kwargs is None: + fn_kwargs = {} + + f = getattr(F, func) + tensor, pil_img = self._create_data(height=10, width=10, device=self.device) + transformed_tensor = f(tensor, **fn_kwargs) + transformed_pil_img = f(pil_img, **fn_kwargs) + self.compareTensorToPIL(transformed_tensor, transformed_pil_img) + + def _test_transform_vs_scripted(self, transform, s_transform, tensor, msg=None): + torch.manual_seed(12) + out1 = transform(tensor) + torch.manual_seed(12) + out2 = s_transform(tensor) + self.assertTrue(out1.equal(out2), msg=msg) + + def _test_transform_vs_scripted_on_batch(self, transform, s_transform, batch_tensors, msg=None): + torch.manual_seed(12) + transformed_batch = transform(batch_tensors) + + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + torch.manual_seed(12) + transformed_img = transform(img_tensor) + self.assertTrue(transformed_img.equal(transformed_batch[i, ...]), msg=msg) + + torch.manual_seed(12) + s_transformed_batch = s_transform(batch_tensors) + self.assertTrue(transformed_batch.equal(s_transformed_batch), msg=msg) + + def _test_class_op(self, method, meth_kwargs=None, test_exact_match=True, **match_kwargs): + if meth_kwargs is None: + meth_kwargs = {} + + # test for class interface + f = getattr(T, method)(**meth_kwargs) + scripted_fn = torch.jit.script(f) + + tensor, pil_img = self._create_data(26, 34, device=self.device) + # set seed to reproduce the same transformation for tensor and PIL image + torch.manual_seed(12) + transformed_tensor = f(tensor) + torch.manual_seed(12) + transformed_pil_img = f(pil_img) + if test_exact_match: + self.compareTensorToPIL(transformed_tensor, transformed_pil_img, **match_kwargs) + else: + self.approxEqualTensorToPIL(transformed_tensor.float(), transformed_pil_img, **match_kwargs) + + torch.manual_seed(12) + transformed_tensor_script = scripted_fn(tensor) + self.assertTrue(transformed_tensor.equal(transformed_tensor_script)) + + batch_tensors = self._create_data_batch(height=23, width=34, channels=3, num_samples=4, device=self.device) + self._test_transform_vs_scripted_on_batch(f, scripted_fn, batch_tensors) + + with get_tmp_dir() as tmp_dir: + scripted_fn.save(os.path.join(tmp_dir, "t_{}.pt".format(method))) + + def _test_op(self, func, method, fn_kwargs=None, meth_kwargs=None): + self._test_functional_op(func, fn_kwargs) + self._test_class_op(method, meth_kwargs) + + def test_random_horizontal_flip(self): + self._test_op('hflip', 'RandomHorizontalFlip') + + def test_random_vertical_flip(self): + self._test_op('vflip', 'RandomVerticalFlip') + + def test_random_invert(self): + self._test_op('invert', 'RandomInvert') + + def test_random_posterize(self): + fn_kwargs = meth_kwargs = {"bits": 4} + self._test_op( + 'posterize', 'RandomPosterize', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + + def test_random_solarize(self): + fn_kwargs = meth_kwargs = {"threshold": 192.0} + self._test_op( + 'solarize', 'RandomSolarize', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + + def test_random_adjust_sharpness(self): + fn_kwargs = meth_kwargs = {"sharpness_factor": 2.0} + self._test_op( + 'adjust_sharpness', 'RandomAdjustSharpness', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + + def test_random_autocontrast(self): + self._test_op('autocontrast', 'RandomAutocontrast') + + def test_random_equalize(self): + self._test_op('equalize', 'RandomEqualize') + + def test_color_jitter(self): + + tol = 1.0 + 1e-10 + for f in [0.1, 0.5, 1.0, 1.34, (0.3, 0.7), [0.4, 0.5]]: + meth_kwargs = {"brightness": f} + self._test_class_op( + "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=tol, agg_method="max" + ) + + for f in [0.2, 0.5, 1.0, 1.5, (0.3, 0.7), [0.4, 0.5]]: + meth_kwargs = {"contrast": f} + self._test_class_op( + "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=tol, agg_method="max" + ) + + for f in [0.5, 0.75, 1.0, 1.25, (0.3, 0.7), [0.3, 0.4]]: + meth_kwargs = {"saturation": f} + self._test_class_op( + "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=tol, agg_method="max" + ) + + for f in [0.2, 0.5, (-0.2, 0.3), [-0.4, 0.5]]: + meth_kwargs = {"hue": f} + self._test_class_op( + "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=16.1, agg_method="max" + ) + + # All 4 parameters together + meth_kwargs = {"brightness": 0.2, "contrast": 0.2, "saturation": 0.2, "hue": 0.2} + self._test_class_op( + "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=12.1, agg_method="max" + ) + + def test_pad(self): + for m in ["constant", "edge", "reflect", "symmetric"]: + fill = 127 if m == "constant" else 0 + for mul in [1, -1]: + # Test functional.pad (PIL and Tensor) with padding as single int + self._test_functional_op( + "pad", fn_kwargs={"padding": mul * 2, "fill": fill, "padding_mode": m} + ) + # Test functional.pad and transforms.Pad with padding as [int, ] + fn_kwargs = meth_kwargs = {"padding": [mul * 2, ], "fill": fill, "padding_mode": m} + self._test_op( + "pad", "Pad", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + # Test functional.pad and transforms.Pad with padding as list + fn_kwargs = meth_kwargs = {"padding": [mul * 4, 4], "fill": fill, "padding_mode": m} + self._test_op( + "pad", "Pad", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + # Test functional.pad and transforms.Pad with padding as tuple + fn_kwargs = meth_kwargs = {"padding": (mul * 2, 2, 2, mul * 2), "fill": fill, "padding_mode": m} + self._test_op( + "pad", "Pad", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + + def test_crop(self): + fn_kwargs = {"top": 2, "left": 3, "height": 4, "width": 5} + # Test transforms.RandomCrop with size and padding as tuple + meth_kwargs = {"size": (4, 5), "padding": (4, 4), "pad_if_needed": True, } + self._test_op( + 'crop', 'RandomCrop', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + + sizes = [5, [5, ], [6, 6]] + padding_configs = [ + {"padding_mode": "constant", "fill": 0}, + {"padding_mode": "constant", "fill": 10}, + {"padding_mode": "constant", "fill": 20}, + {"padding_mode": "edge"}, + {"padding_mode": "reflect"}, + ] + + for size in sizes: + for padding_config in padding_configs: + config = dict(padding_config) + config["size"] = size + self._test_class_op("RandomCrop", config) + + def test_center_crop(self): + fn_kwargs = {"output_size": (4, 5)} + meth_kwargs = {"size": (4, 5), } + self._test_op( + "center_crop", "CenterCrop", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + fn_kwargs = {"output_size": (5,)} + meth_kwargs = {"size": (5, )} + self._test_op( + "center_crop", "CenterCrop", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + tensor = torch.randint(0, 256, (3, 10, 10), dtype=torch.uint8, device=self.device) + # Test torchscript of transforms.CenterCrop with size as int + f = T.CenterCrop(size=5) + scripted_fn = torch.jit.script(f) + scripted_fn(tensor) + + # Test torchscript of transforms.CenterCrop with size as [int, ] + f = T.CenterCrop(size=[5, ]) + scripted_fn = torch.jit.script(f) + scripted_fn(tensor) + + # Test torchscript of transforms.CenterCrop with size as tuple + f = T.CenterCrop(size=(6, 6)) + scripted_fn = torch.jit.script(f) + scripted_fn(tensor) + + with get_tmp_dir() as tmp_dir: + scripted_fn.save(os.path.join(tmp_dir, "t_center_crop.pt")) + + def _test_op_list_output(self, func, method, out_length, fn_kwargs=None, meth_kwargs=None): + if fn_kwargs is None: + fn_kwargs = {} + if meth_kwargs is None: + meth_kwargs = {} + + fn = getattr(F, func) + scripted_fn = torch.jit.script(fn) + + tensor, pil_img = self._create_data(height=20, width=20, device=self.device) + transformed_t_list = fn(tensor, **fn_kwargs) + transformed_p_list = fn(pil_img, **fn_kwargs) + self.assertEqual(len(transformed_t_list), len(transformed_p_list)) + self.assertEqual(len(transformed_t_list), out_length) + for transformed_tensor, transformed_pil_img in zip(transformed_t_list, transformed_p_list): + self.compareTensorToPIL(transformed_tensor, transformed_pil_img) + + transformed_t_list_script = scripted_fn(tensor.detach().clone(), **fn_kwargs) + self.assertEqual(len(transformed_t_list), len(transformed_t_list_script)) + self.assertEqual(len(transformed_t_list_script), out_length) + for transformed_tensor, transformed_tensor_script in zip(transformed_t_list, transformed_t_list_script): + self.assertTrue(transformed_tensor.equal(transformed_tensor_script), + msg="{} vs {}".format(transformed_tensor, transformed_tensor_script)) + + # test for class interface + fn = getattr(T, method)(**meth_kwargs) + scripted_fn = torch.jit.script(fn) + output = scripted_fn(tensor) + self.assertEqual(len(output), len(transformed_t_list_script)) + + # test on batch of tensors + batch_tensors = self._create_data_batch(height=23, width=34, channels=3, num_samples=4, device=self.device) + torch.manual_seed(12) + transformed_batch_list = fn(batch_tensors) + + for i in range(len(batch_tensors)): + img_tensor = batch_tensors[i, ...] + torch.manual_seed(12) + transformed_img_list = fn(img_tensor) + for transformed_img, transformed_batch in zip(transformed_img_list, transformed_batch_list): + self.assertTrue(transformed_img.equal(transformed_batch[i, ...]), + msg="{} vs {}".format(transformed_img, transformed_batch[i, ...])) + + with get_tmp_dir() as tmp_dir: + scripted_fn.save(os.path.join(tmp_dir, "t_op_list_{}.pt".format(method))) + + def test_five_crop(self): + fn_kwargs = meth_kwargs = {"size": (5,)} + self._test_op_list_output( + "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + fn_kwargs = meth_kwargs = {"size": [5, ]} + self._test_op_list_output( + "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + fn_kwargs = meth_kwargs = {"size": (4, 5)} + self._test_op_list_output( + "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + fn_kwargs = meth_kwargs = {"size": [4, 5]} + self._test_op_list_output( + "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + + def test_ten_crop(self): + fn_kwargs = meth_kwargs = {"size": (5,)} + self._test_op_list_output( + "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + fn_kwargs = meth_kwargs = {"size": [5, ]} + self._test_op_list_output( + "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + fn_kwargs = meth_kwargs = {"size": (4, 5)} + self._test_op_list_output( + "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + fn_kwargs = meth_kwargs = {"size": [4, 5]} + self._test_op_list_output( + "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs + ) + + def test_resize(self): + + # TODO: Minimal check for bug-fix, improve this later + x = torch.rand(3, 32, 46) + t = T.Resize(size=38) + y = t(x) + # If size is an int, smaller edge of the image will be matched to this number. + # i.e, if height > width, then image will be rescaled to (size * height / width, size). + self.assertTrue(isinstance(y, torch.Tensor)) + self.assertEqual(y.shape[1], 38) + self.assertEqual(y.shape[2], int(38 * 46 / 32)) + + tensor, _ = self._create_data(height=34, width=36, device=self.device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=self.device) + + for dt in [None, torch.float32, torch.float64]: + if dt is not None: + # This is a trivial cast to float of uint8 data to test all cases + tensor = tensor.to(dt) + for size in [32, 34, [32, ], [32, 32], (32, 32), [34, 35]]: + for max_size in (None, 35, 1000): + if max_size is not None and isinstance(size, Sequence) and len(size) != 1: + continue # Not supported + for interpolation in [BILINEAR, BICUBIC, NEAREST]: + + if isinstance(size, int): + script_size = [size, ] + else: + script_size = size + + transform = T.Resize(size=script_size, interpolation=interpolation, max_size=max_size) + s_transform = torch.jit.script(transform) + self._test_transform_vs_scripted(transform, s_transform, tensor) + self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + with get_tmp_dir() as tmp_dir: + s_transform.save(os.path.join(tmp_dir, "t_resize.pt")) + + def test_resized_crop(self): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=self.device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=self.device) + + for scale in [(0.7, 1.2), [0.7, 1.2]]: + for ratio in [(0.75, 1.333), [0.75, 1.333]]: + for size in [(32, ), [44, ], [32, ], [32, 32], (32, 32), [44, 55]]: + for interpolation in [NEAREST, BILINEAR, BICUBIC]: + transform = T.RandomResizedCrop( + size=size, scale=scale, ratio=ratio, interpolation=interpolation + ) + s_transform = torch.jit.script(transform) + self._test_transform_vs_scripted(transform, s_transform, tensor) + self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + with get_tmp_dir() as tmp_dir: + s_transform.save(os.path.join(tmp_dir, "t_resized_crop.pt")) + + def test_random_affine(self): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=self.device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=self.device) + + for shear in [15, 10.0, (5.0, 10.0), [-15, 15], [-10.0, 10.0, -11.0, 11.0]]: + for scale in [(0.7, 1.2), [0.7, 1.2]]: + for translate in [(0.1, 0.2), [0.2, 0.1]]: + for degrees in [45, 35.0, (-45, 45), [-90.0, 90.0]]: + for interpolation in [NEAREST, BILINEAR]: + for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]: + transform = T.RandomAffine( + degrees=degrees, translate=translate, + scale=scale, shear=shear, interpolation=interpolation, fill=fill + ) + s_transform = torch.jit.script(transform) + + self._test_transform_vs_scripted(transform, s_transform, tensor) + self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + with get_tmp_dir() as tmp_dir: + s_transform.save(os.path.join(tmp_dir, "t_random_affine.pt")) + + def test_random_rotate(self): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=self.device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=self.device) + + for center in [(0, 0), [10, 10], None, (56, 44)]: + for expand in [True, False]: + for degrees in [45, 35.0, (-45, 45), [-90.0, 90.0]]: + for interpolation in [NEAREST, BILINEAR]: + for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]: + transform = T.RandomRotation( + degrees=degrees, interpolation=interpolation, expand=expand, center=center, fill=fill + ) + s_transform = torch.jit.script(transform) + + self._test_transform_vs_scripted(transform, s_transform, tensor) + self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + with get_tmp_dir() as tmp_dir: + s_transform.save(os.path.join(tmp_dir, "t_random_rotate.pt")) + + def test_random_perspective(self): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=self.device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=self.device) + + for distortion_scale in np.linspace(0.1, 1.0, num=20): + for interpolation in [NEAREST, BILINEAR]: + for fill in [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]: + transform = T.RandomPerspective( + distortion_scale=distortion_scale, + interpolation=interpolation, + fill=fill + ) + s_transform = torch.jit.script(transform) + + self._test_transform_vs_scripted(transform, s_transform, tensor) + self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + with get_tmp_dir() as tmp_dir: + s_transform.save(os.path.join(tmp_dir, "t_perspective.pt")) + + def test_to_grayscale(self): + + meth_kwargs = {"num_output_channels": 1} + tol = 1.0 + 1e-10 + self._test_class_op( + "Grayscale", meth_kwargs=meth_kwargs, test_exact_match=False, tol=tol, agg_method="max" + ) + + meth_kwargs = {"num_output_channels": 3} + self._test_class_op( + "Grayscale", meth_kwargs=meth_kwargs, test_exact_match=False, tol=tol, agg_method="max" + ) + + meth_kwargs = {} + self._test_class_op( + "RandomGrayscale", meth_kwargs=meth_kwargs, test_exact_match=False, tol=tol, agg_method="max" + ) + + def test_normalize(self): + fn = T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + tensor, _ = self._create_data(26, 34, device=self.device) + + with self.assertRaisesRegex(TypeError, r"Input tensor should be a float tensor"): + fn(tensor) + + batch_tensors = torch.rand(4, 3, 44, 56, device=self.device) + tensor = tensor.to(dtype=torch.float32) / 255.0 + # test for class interface + scripted_fn = torch.jit.script(fn) + + self._test_transform_vs_scripted(fn, scripted_fn, tensor) + self._test_transform_vs_scripted_on_batch(fn, scripted_fn, batch_tensors) + + with get_tmp_dir() as tmp_dir: + scripted_fn.save(os.path.join(tmp_dir, "t_norm.pt")) + + def test_linear_transformation(self): + c, h, w = 3, 24, 32 + + tensor, _ = self._create_data(h, w, channels=c, device=self.device) + + matrix = torch.rand(c * h * w, c * h * w, device=self.device) + mean_vector = torch.rand(c * h * w, device=self.device) + + fn = T.LinearTransformation(matrix, mean_vector) + scripted_fn = torch.jit.script(fn) + + self._test_transform_vs_scripted(fn, scripted_fn, tensor) + + batch_tensors = torch.rand(4, c, h, w, device=self.device) + # We skip some tests from _test_transform_vs_scripted_on_batch as + # results for scripted and non-scripted transformations are not exactly the same + torch.manual_seed(12) + transformed_batch = fn(batch_tensors) + torch.manual_seed(12) + s_transformed_batch = scripted_fn(batch_tensors) + self.assertTrue(transformed_batch.equal(s_transformed_batch)) + + with get_tmp_dir() as tmp_dir: + scripted_fn.save(os.path.join(tmp_dir, "t_norm.pt")) + + def test_compose(self): + tensor, _ = self._create_data(26, 34, device=self.device) + tensor = tensor.to(dtype=torch.float32) / 255.0 + + transforms = T.Compose([ + T.CenterCrop(10), + T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), + ]) + s_transforms = torch.nn.Sequential(*transforms.transforms) + + scripted_fn = torch.jit.script(s_transforms) + torch.manual_seed(12) + transformed_tensor = transforms(tensor) + torch.manual_seed(12) + transformed_tensor_script = scripted_fn(tensor) + self.assertTrue(transformed_tensor.equal(transformed_tensor_script), msg="{}".format(transforms)) + + t = T.Compose([ + lambda x: x, + ]) + with self.assertRaisesRegex(RuntimeError, r"Could not get name of python class object"): + torch.jit.script(t) + + def test_random_apply(self): + tensor, _ = self._create_data(26, 34, device=self.device) + tensor = tensor.to(dtype=torch.float32) / 255.0 + + transforms = T.RandomApply([ + T.RandomHorizontalFlip(), + T.ColorJitter(), + ], p=0.4) + s_transforms = T.RandomApply(torch.nn.ModuleList([ + T.RandomHorizontalFlip(), + T.ColorJitter(), + ]), p=0.4) + + scripted_fn = torch.jit.script(s_transforms) + torch.manual_seed(12) + transformed_tensor = transforms(tensor) + torch.manual_seed(12) + transformed_tensor_script = scripted_fn(tensor) + self.assertTrue(transformed_tensor.equal(transformed_tensor_script), msg="{}".format(transforms)) + + if torch.device(self.device).type == "cpu": + # Can't check this twice, otherwise + # "Can't redefine method: forward on class: __torch__.torchvision.transforms.transforms.RandomApply" + transforms = T.RandomApply([ + T.ColorJitter(), + ], p=0.3) + with self.assertRaisesRegex(RuntimeError, r"Module 'RandomApply' has no attribute 'transforms'"): + torch.jit.script(transforms) + + def test_gaussian_blur(self): + tol = 1.0 + 1e-10 + self._test_class_op( + "GaussianBlur", meth_kwargs={"kernel_size": 3, "sigma": 0.75}, + test_exact_match=False, agg_method="max", tol=tol + ) + + self._test_class_op( + "GaussianBlur", meth_kwargs={"kernel_size": 23, "sigma": [0.1, 2.0]}, + test_exact_match=False, agg_method="max", tol=tol + ) + + self._test_class_op( + "GaussianBlur", meth_kwargs={"kernel_size": 23, "sigma": (0.1, 2.0)}, + test_exact_match=False, agg_method="max", tol=tol + ) + + self._test_class_op( + "GaussianBlur", meth_kwargs={"kernel_size": [3, 3], "sigma": (1.0, 1.0)}, + test_exact_match=False, agg_method="max", tol=tol + ) + + self._test_class_op( + "GaussianBlur", meth_kwargs={"kernel_size": (3, 3), "sigma": (0.1, 2.0)}, + test_exact_match=False, agg_method="max", tol=tol + ) + + self._test_class_op( + "GaussianBlur", meth_kwargs={"kernel_size": [23], "sigma": 0.75}, + test_exact_match=False, agg_method="max", tol=tol + ) + + def test_random_erasing(self): + img = torch.rand(3, 60, 60) + + # Test Set 0: invalid value + random_erasing = T.RandomErasing(value=(0.1, 0.2, 0.3, 0.4), p=1.0) + with self.assertRaises(ValueError, msg="If value is a sequence, it should have either a single value or 3"): + random_erasing(img) + + tensor, _ = self._create_data(24, 32, channels=3, device=self.device) + batch_tensors = torch.rand(4, 3, 44, 56, device=self.device) + + test_configs = [ + {"value": 0.2}, + {"value": "random"}, + {"value": (0.2, 0.2, 0.2)}, + {"value": "random", "ratio": (0.1, 0.2)}, + ] + + for config in test_configs: + fn = T.RandomErasing(**config) + scripted_fn = torch.jit.script(fn) + self._test_transform_vs_scripted(fn, scripted_fn, tensor) + self._test_transform_vs_scripted_on_batch(fn, scripted_fn, batch_tensors) + + with get_tmp_dir() as tmp_dir: + scripted_fn.save(os.path.join(tmp_dir, "t_random_erasing.pt")) + + def test_convert_image_dtype(self): + tensor, _ = self._create_data(26, 34, device=self.device) + batch_tensors = torch.rand(4, 3, 44, 56, device=self.device) + + for in_dtype in int_dtypes() + float_dtypes(): + in_tensor = tensor.to(in_dtype) + in_batch_tensors = batch_tensors.to(in_dtype) + for out_dtype in int_dtypes() + float_dtypes(): + + fn = T.ConvertImageDtype(dtype=out_dtype) + scripted_fn = torch.jit.script(fn) + + if (in_dtype == torch.float32 and out_dtype in (torch.int32, torch.int64)) or \ + (in_dtype == torch.float64 and out_dtype == torch.int64): + with self.assertRaisesRegex(RuntimeError, r"cannot be performed safely"): + self._test_transform_vs_scripted(fn, scripted_fn, in_tensor) + with self.assertRaisesRegex(RuntimeError, r"cannot be performed safely"): + self._test_transform_vs_scripted_on_batch(fn, scripted_fn, in_batch_tensors) + continue + + self._test_transform_vs_scripted(fn, scripted_fn, in_tensor) + self._test_transform_vs_scripted_on_batch(fn, scripted_fn, in_batch_tensors) + + with get_tmp_dir() as tmp_dir: + scripted_fn.save(os.path.join(tmp_dir, "t_convert_dtype.pt")) + + def test_autoaugment(self): + tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=self.device) + batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=self.device) + + s_transform = None + for policy in T.AutoAugmentPolicy: + for fill in [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1, ], 1]: + transform = T.AutoAugment(policy=policy, fill=fill) + s_transform = torch.jit.script(transform) + for _ in range(100): + self._test_transform_vs_scripted(transform, s_transform, tensor) + self._test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) + + if s_transform is not None: + with get_tmp_dir() as tmp_dir: + s_transform.save(os.path.join(tmp_dir, "t_autoaugment.pt")) + + +@unittest.skipIf(not torch.cuda.is_available(), reason="Skip if no CUDA device") +class CUDATester(Tester): + + def setUp(self): + torch.set_deterministic(False) + self.device = "cuda" + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_transforms_video.py b/test/test_transforms_video.py index 296d519f5c4..e0c7ab5260b 100644 --- a/test/test_transforms_video.py +++ b/test/test_transforms_video.py @@ -1,10 +1,9 @@ -from __future__ import division import torch -import torchvision.transforms._transforms_video as transforms from torchvision.transforms import Compose import unittest import random import numpy as np +import warnings try: from scipy import stats @@ -12,6 +11,11 @@ stats = None +with warnings.catch_warnings(record=True): + warnings.simplefilter("always") + import torchvision.transforms._transforms_video as transforms + + class TestVideoTransforms(unittest.TestCase): def test_random_crop_video(self): diff --git a/test/test_utils.py b/test/test_utils.py index f1982130f75..8c4cc620229 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,3 +1,4 @@ +import numpy as np import os import sys import tempfile @@ -6,7 +7,37 @@ import unittest from io import BytesIO import torchvision.transforms.functional as F -from PIL import Image +from PIL import Image, __version__ as PILLOW_VERSION + + +PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split('.')) + +boxes = torch.tensor([[0, 0, 20, 20], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + +masks = torch.tensor([ + [ + [-2.2799, -2.2799, -2.2799, -2.2799, -2.2799], + [5.0914, 5.0914, 5.0914, 5.0914, 5.0914], + [-2.2799, -2.2799, -2.2799, -2.2799, -2.2799], + [-2.2799, -2.2799, -2.2799, -2.2799, -2.2799], + [-2.2799, -2.2799, -2.2799, -2.2799, -2.2799] + ], + [ + [5.0914, 5.0914, 5.0914, 5.0914, 5.0914], + [-2.2799, -2.2799, -2.2799, -2.2799, -2.2799], + [5.0914, 5.0914, 5.0914, 5.0914, 5.0914], + [5.0914, 5.0914, 5.0914, 5.0914, 5.0914], + [-1.4541, -1.4541, -1.4541, -1.4541, -1.4541] + ], + [ + [-1.4541, -1.4541, -1.4541, -1.4541, -1.4541], + [-1.4541, -1.4541, -1.4541, -1.4541, -1.4541], + [-1.4541, -1.4541, -1.4541, -1.4541, -1.4541], + [-1.4541, -1.4541, -1.4541, -1.4541, -1.4541], + [5.0914, 5.0914, 5.0914, 5.0914, 5.0914], + ] +], dtype=torch.float) class Tester(unittest.TestCase): @@ -41,21 +72,21 @@ def test_normalize_in_make_grid(self): self.assertTrue(torch.equal(norm_max, rounded_grid_max), 'Normalized max is not equal to 1') self.assertTrue(torch.equal(norm_min, rounded_grid_min), 'Normalized min is not equal to 0') - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') + @unittest.skipIf(sys.platform in ('win32', 'cygwin'), 'temporarily disabled on Windows') def test_save_image(self): with tempfile.NamedTemporaryFile(suffix='.png') as f: t = torch.rand(2, 3, 64, 64) utils.save_image(t, f.name) self.assertTrue(os.path.exists(f.name), 'The image is not present after save') - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') + @unittest.skipIf(sys.platform in ('win32', 'cygwin'), 'temporarily disabled on Windows') def test_save_image_single_pixel(self): with tempfile.NamedTemporaryFile(suffix='.png') as f: t = torch.rand(1, 3, 1, 1) utils.save_image(t, f.name) self.assertTrue(os.path.exists(f.name), 'The pixel image is not present after save') - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') + @unittest.skipIf(sys.platform in ('win32', 'cygwin'), 'temporarily disabled on Windows') def test_save_image_file_object(self): with tempfile.NamedTemporaryFile(suffix='.png') as f: t = torch.rand(2, 3, 64, 64) @@ -67,7 +98,7 @@ def test_save_image_file_object(self): self.assertTrue(torch.equal(F.to_tensor(img_orig), F.to_tensor(img_bytes)), 'Image not stored in file object') - @unittest.skipIf('win' in sys.platform, 'temporarily disabled on Windows') + @unittest.skipIf(sys.platform in ('win32', 'cygwin'), 'temporarily disabled on Windows') def test_save_image_single_pixel_file_object(self): with tempfile.NamedTemporaryFile(suffix='.png') as f: t = torch.rand(1, 3, 1, 1) @@ -79,6 +110,105 @@ def test_save_image_single_pixel_file_object(self): self.assertTrue(torch.equal(F.to_tensor(img_orig), F.to_tensor(img_bytes)), 'Pixel Image not stored in file object') + def test_draw_boxes(self): + img = torch.full((3, 100, 100), 255, dtype=torch.uint8) + img_cp = img.clone() + boxes_cp = boxes.clone() + labels = ["a", "b", "c", "d"] + colors = ["green", "#FF00FF", (0, 255, 0), "red"] + result = utils.draw_bounding_boxes(img, boxes, labels=labels, colors=colors, fill=True) + + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_util.png") + if not os.path.exists(path): + res = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy()) + res.save(path) + + if PILLOW_VERSION >= (8, 2): + # The reference image is only valid for new PIL versions + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + self.assertTrue(torch.equal(result, expected)) + + # Check if modification is not in place + self.assertTrue(torch.all(torch.eq(boxes, boxes_cp)).item()) + self.assertTrue(torch.all(torch.eq(img, img_cp)).item()) + + def test_draw_boxes_vanilla(self): + img = torch.full((3, 100, 100), 0, dtype=torch.uint8) + img_cp = img.clone() + boxes_cp = boxes.clone() + result = utils.draw_bounding_boxes(img, boxes, fill=False, width=7) + + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_boxes_vanilla.png") + if not os.path.exists(path): + res = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy()) + res.save(path) + + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + self.assertTrue(torch.equal(result, expected)) + # Check if modification is not in place + self.assertTrue(torch.all(torch.eq(boxes, boxes_cp)).item()) + self.assertTrue(torch.all(torch.eq(img, img_cp)).item()) + + def test_draw_invalid_boxes(self): + img_tp = ((1, 1, 1), (1, 2, 3)) + img_wrong1 = torch.full((3, 5, 5), 255, dtype=torch.float) + img_wrong2 = torch.full((1, 3, 5, 5), 255, dtype=torch.uint8) + boxes = torch.tensor([[0, 0, 20, 20], [0, 0, 0, 0], + [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float) + self.assertRaises(TypeError, utils.draw_bounding_boxes, img_tp, boxes) + self.assertRaises(ValueError, utils.draw_bounding_boxes, img_wrong1, boxes) + self.assertRaises(ValueError, utils.draw_bounding_boxes, img_wrong2, boxes) + + def test_draw_segmentation_masks_colors(self): + img = torch.full((3, 5, 5), 255, dtype=torch.uint8) + img_cp = img.clone() + masks_cp = masks.clone() + colors = ["#FF00FF", (0, 255, 0), "red"] + result = utils.draw_segmentation_masks(img, masks, colors=colors) + + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", + "fakedata", "draw_segm_masks_colors_util.png") + + if not os.path.exists(path): + res = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy()) + res.save(path) + + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + self.assertTrue(torch.equal(result, expected)) + # Check if modification is not in place + self.assertTrue(torch.all(torch.eq(img, img_cp)).item()) + self.assertTrue(torch.all(torch.eq(masks, masks_cp)).item()) + + def test_draw_segmentation_masks_no_colors(self): + img = torch.full((3, 20, 20), 255, dtype=torch.uint8) + img_cp = img.clone() + masks_cp = masks.clone() + result = utils.draw_segmentation_masks(img, masks, colors=None) + + path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", + "fakedata", "draw_segm_masks_no_colors_util.png") + + if not os.path.exists(path): + res = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy()) + res.save(path) + + expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1) + self.assertTrue(torch.equal(result, expected)) + # Check if modification is not in place + self.assertTrue(torch.all(torch.eq(img, img_cp)).item()) + self.assertTrue(torch.all(torch.eq(masks, masks_cp)).item()) + + def test_draw_invalid_masks(self): + img_tp = ((1, 1, 1), (1, 2, 3)) + img_wrong1 = torch.full((3, 5, 5), 255, dtype=torch.float) + img_wrong2 = torch.full((1, 3, 5, 5), 255, dtype=torch.uint8) + img_wrong3 = torch.full((4, 5, 5), 255, dtype=torch.uint8) + + self.assertRaises(TypeError, utils.draw_segmentation_masks, img_tp, masks) + self.assertRaises(ValueError, utils.draw_segmentation_masks, img_wrong1, masks) + self.assertRaises(ValueError, utils.draw_segmentation_masks, img_wrong2, masks) + self.assertRaises(ValueError, utils.draw_segmentation_masks, img_wrong3, masks) + if __name__ == '__main__': unittest.main() diff --git a/test/test_video_reader.py b/test/test_video_reader.py index bf59eb7dc4d..5b9b2184daf 100644 --- a/test/test_video_reader.py +++ b/test/test_video_reader.py @@ -1,31 +1,28 @@ import collections -from common_utils import get_tmp_dir -from fractions import Fraction import math -import numpy as np import os -import sys import time +import unittest +from fractions import Fraction + +import numpy as np import torch import torchvision.io as io -import unittest from numpy.random import randint +from torchvision.io import _HAS_VIDEO_OPT +from common_utils import PY39_SKIP + try: import av + # Do a version test too io.video._check_av_available() except ImportError: av = None -if sys.version_info < (3,): - from urllib2 import URLError -else: - from urllib.error import URLError - - -from torchvision.io import _HAS_VIDEO_OPT +from urllib.error import URLError VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos") @@ -39,10 +36,7 @@ "check_aframes", "check_aframe_pts", ] -GroundTruth = collections.namedtuple( - "GroundTruth", - " ".join(CheckerConfig) -) +GroundTruth = collections.namedtuple("GroundTruth", " ".join(CheckerConfig)) all_check_config = GroundTruth( duration=0, @@ -193,9 +187,9 @@ def _decode_frames_by_av_module( frames are read """ if video_end_pts is None: - video_end_pts = float('inf') + video_end_pts = float("inf") if audio_end_pts is None: - audio_end_pts = float('inf') + audio_end_pts = float("inf") container = av.open(full_path) video_frames = [] @@ -282,8 +276,10 @@ class TestVideoReader(unittest.TestCase): def check_separate_decoding_result(self, tv_result, config): """check the decoding results from TorchVision decoder """ - vframes, vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, \ - atimebase, asample_rate, aduration = tv_result + vframes, vframe_pts, vtimebase, vfps, vduration, \ + aframes, aframe_pts, atimebase, asample_rate, aduration = ( + tv_result + ) video_duration = vduration.item() * Fraction( vtimebase[0].item(), vtimebase[1].item() @@ -321,6 +317,13 @@ def check_probe_result(self, result, config): ) self.assertAlmostEqual(audio_duration, config.duration, delta=0.5) + def check_meta_result(self, result, config): + self.assertAlmostEqual(result.video_duration, config.duration, delta=0.5) + self.assertAlmostEqual(result.video_fps, config.video_fps, delta=0.5) + if result.has_audio > 0: + self.assertEqual(result.audio_sample_rate, config.audio_sample_rate) + self.assertAlmostEqual(result.audio_duration, config.duration, delta=0.5) + def compare_decoding_result(self, tv_result, ref_result, config=all_check_config): """ Compare decoding results from two sources. @@ -330,8 +333,10 @@ def compare_decoding_result(self, tv_result, ref_result, config=all_check_config decoder or TorchVision decoder with getPtsOnly = 1 config: config of decoding results checker """ - vframes, vframe_pts, vtimebase, _vfps, _vduration, aframes, aframe_pts, \ - atimebase, _asample_rate, _aduration = tv_result + vframes, vframe_pts, vtimebase, _vfps, _vduration, \ + aframes, aframe_pts, atimebase, _asample_rate, _aduration = ( + tv_result + ) if isinstance(ref_result, list): # the ref_result is from new video_reader decoder ref_result = DecoderResult( @@ -344,22 +349,34 @@ def compare_decoding_result(self, tv_result, ref_result, config=all_check_config ) if vframes.numel() > 0 and ref_result.vframes.numel() > 0: - mean_delta = torch.mean(torch.abs(vframes.float() - ref_result.vframes.float())) + mean_delta = torch.mean( + torch.abs(vframes.float() - ref_result.vframes.float()) + ) self.assertAlmostEqual(mean_delta, 0, delta=8.0) - mean_delta = torch.mean(torch.abs(vframe_pts.float() - ref_result.vframe_pts.float())) + mean_delta = torch.mean( + torch.abs(vframe_pts.float() - ref_result.vframe_pts.float()) + ) self.assertAlmostEqual(mean_delta, 0, delta=1.0) is_same = torch.all(torch.eq(vtimebase, ref_result.vtimebase)).item() self.assertEqual(is_same, True) - if config.check_aframes and aframes.numel() > 0 and ref_result.aframes.numel() > 0: + if ( + config.check_aframes + and aframes.numel() > 0 + and ref_result.aframes.numel() > 0 + ): """Audio stream is available and audio frame is required to return from decoder""" is_same = torch.all(torch.eq(aframes, ref_result.aframes)).item() self.assertEqual(is_same, True) - if config.check_aframe_pts and aframe_pts.numel() > 0 and ref_result.aframe_pts.numel() > 0: + if ( + config.check_aframe_pts + and aframe_pts.numel() > 0 + and ref_result.aframe_pts.numel() > 0 + ): """Audio stream is available""" is_same = torch.all(torch.eq(aframe_pts, ref_result.aframe_pts)).item() self.assertEqual(is_same, True) @@ -375,7 +392,7 @@ def compare_decoding_result(self, tv_result, ref_result, config=all_check_config def test_stress_test_read_video_from_file(self): num_iter = 10000 # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -396,6 +413,7 @@ def test_stress_test_read_video_from_file(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -409,12 +427,13 @@ def test_stress_test_read_video_from_file(self): audio_timebase_den, ) + @PY39_SKIP def test_read_video_from_file(self): """ Test the case when decoder starts with a video file to decode frames. """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -434,6 +453,7 @@ def test_read_video_from_file(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -453,13 +473,14 @@ def test_read_video_from_file(self): # compare decoding results self.compare_decoding_result(tv_result, pyav_result, config) + @PY39_SKIP def test_read_video_from_file_read_single_stream_only(self): """ Test the case when decoder starts with a video file to decode frames, and only reads video stream and ignores audio stream """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -479,6 +500,7 @@ def test_read_video_from_file_read_single_stream_only(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -492,15 +514,19 @@ def test_read_video_from_file_read_single_stream_only(self): audio_timebase_den, ) - vframes, vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, \ - atimebase, asample_rate, aduration = tv_result + vframes, vframe_pts, vtimebase, vfps, vduration, \ + aframes, aframe_pts, atimebase, asample_rate, aduration = ( + tv_result + ) self.assertEqual(vframes.numel() > 0, readVideoStream) self.assertEqual(vframe_pts.numel() > 0, readVideoStream) self.assertEqual(vtimebase.numel() > 0, readVideoStream) self.assertEqual(vfps.numel() > 0, readVideoStream) - expect_audio_data = readAudioStream == 1 and config.audio_sample_rate is not None + expect_audio_data = ( + readAudioStream == 1 and config.audio_sample_rate is not None + ) self.assertEqual(aframes.numel() > 0, expect_audio_data) self.assertEqual(aframe_pts.numel() > 0, expect_audio_data) self.assertEqual(atimebase.numel() > 0, expect_audio_data) @@ -512,7 +538,49 @@ def test_read_video_from_file_rescale_min_dimension(self): video min dimension between height and width is set. """ # video related - width, height, min_dimension = 0, 0, 128 + width, height, min_dimension, max_dimension = 0, 0, 128, 0 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + samples, channels = 0, 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 + + for test_video, _config in test_videos.items(): + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + seek_frame_margin, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + self.assertEqual( + min_dimension, min(tv_result[0].size(1), tv_result[0].size(2)) + ) + + def test_read_video_from_file_rescale_max_dimension(self): + """ + Test the case when decoder starts with a video file to decode frames, and + video min dimension between height and width is set. + """ + # video related + width, height, min_dimension, max_dimension = 0, 0, 0, 85 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -531,6 +599,7 @@ def test_read_video_from_file_rescale_min_dimension(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -543,7 +612,54 @@ def test_read_video_from_file_rescale_min_dimension(self): audio_timebase_num, audio_timebase_den, ) - self.assertEqual(min_dimension, min(tv_result[0].size(1), tv_result[0].size(2))) + self.assertEqual( + max_dimension, max(tv_result[0].size(1), tv_result[0].size(2)) + ) + + def test_read_video_from_file_rescale_both_min_max_dimension(self): + """ + Test the case when decoder starts with a video file to decode frames, and + video min dimension between height and width is set. + """ + # video related + width, height, min_dimension, max_dimension = 0, 0, 64, 85 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + samples, channels = 0, 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 + + for test_video, _config in test_videos.items(): + full_path = os.path.join(VIDEO_DIR, test_video) + + tv_result = torch.ops.video_reader.read_video_from_file( + full_path, + seek_frame_margin, + 0, # getPtsOnly + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + video_start_pts, + video_end_pts, + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + audio_start_pts, + audio_end_pts, + audio_timebase_num, + audio_timebase_den, + ) + self.assertEqual( + min_dimension, min(tv_result[0].size(1), tv_result[0].size(2)) + ) + self.assertEqual( + max_dimension, max(tv_result[0].size(1), tv_result[0].size(2)) + ) def test_read_video_from_file_rescale_width(self): """ @@ -551,7 +667,7 @@ def test_read_video_from_file_rescale_width(self): video width is set. """ # video related - width, height, min_dimension = 256, 0, 0 + width, height, min_dimension, max_dimension = 256, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -570,6 +686,7 @@ def test_read_video_from_file_rescale_width(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -590,7 +707,7 @@ def test_read_video_from_file_rescale_height(self): video height is set. """ # video related - width, height, min_dimension = 0, 224, 0 + width, height, min_dimension, max_dimension = 0, 224, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -609,6 +726,7 @@ def test_read_video_from_file_rescale_height(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -629,7 +747,7 @@ def test_read_video_from_file_rescale_width_and_height(self): both video height and width are set. """ # video related - width, height, min_dimension = 320, 240, 0 + width, height, min_dimension, max_dimension = 320, 240, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -648,6 +766,7 @@ def test_read_video_from_file_rescale_width_and_height(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -663,18 +782,16 @@ def test_read_video_from_file_rescale_width_and_height(self): self.assertEqual(tv_result[0].size(1), height) self.assertEqual(tv_result[0].size(2), width) + @PY39_SKIP def test_read_video_from_file_audio_resampling(self): """ Test the case when decoder starts with a video file to decode frames, and audio waveform are resampled """ - for samples in [ - 9600, # downsampling - 96000, # upsampling - ]: + for samples in [9600, 96000]: # downsampling # upsampling # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -693,6 +810,7 @@ def test_read_video_from_file_audio_resampling(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -705,25 +823,32 @@ def test_read_video_from_file_audio_resampling(self): audio_timebase_num, audio_timebase_den, ) - vframes, vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, \ - atimebase, asample_rate, aduration = tv_result + vframes, vframe_pts, vtimebase, vfps, vduration, \ + aframes, aframe_pts, atimebase, asample_rate, aduration = ( + tv_result + ) if aframes.numel() > 0: self.assertEqual(samples, asample_rate.item()) self.assertEqual(1, aframes.size(1)) # when audio stream is found - duration = float(aframe_pts[-1]) * float(atimebase[0]) / float(atimebase[1]) + duration = ( + float(aframe_pts[-1]) + * float(atimebase[0]) + / float(atimebase[1]) + ) self.assertAlmostEqual( aframes.size(0), int(duration * asample_rate.item()), delta=0.1 * asample_rate.item(), ) + @PY39_SKIP def test_compare_read_video_from_memory_and_file(self): """ Test the case when video is already in memory, and decoder reads data in memory """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -743,6 +868,7 @@ def test_compare_read_video_from_memory_and_file(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -765,6 +891,7 @@ def test_compare_read_video_from_memory_and_file(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -782,12 +909,13 @@ def test_compare_read_video_from_memory_and_file(self): # finally, compare results decoded from memory and file self.compare_decoding_result(tv_result_memory, tv_result_file) + @PY39_SKIP def test_read_video_from_memory(self): """ Test the case when video is already in memory, and decoder reads data in memory """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -807,6 +935,7 @@ def test_read_video_from_memory(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -825,6 +954,7 @@ def test_read_video_from_memory(self): self.check_separate_decoding_result(tv_result, config) self.compare_decoding_result(tv_result, pyav_result, config) + @PY39_SKIP def test_read_video_from_memory_get_pts_only(self): """ Test the case when video is already in memory, and decoder reads data in memory. @@ -832,7 +962,7 @@ def test_read_video_from_memory_get_pts_only(self): for both pts and frame data """ # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -852,6 +982,7 @@ def test_read_video_from_memory_get_pts_only(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -875,6 +1006,7 @@ def test_read_video_from_memory_get_pts_only(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -892,6 +1024,7 @@ def test_read_video_from_memory_get_pts_only(self): self.assertEqual(tv_result_pts_only[5].numel(), 0) self.compare_decoding_result(tv_result, tv_result_pts_only) + @PY39_SKIP def test_read_video_in_range_from_memory(self): """ Test the case when video is already in memory, and decoder reads data in memory. @@ -901,7 +1034,7 @@ def test_read_video_in_range_from_memory(self): for test_video, config in test_videos.items(): full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) # video related - width, height, min_dimension = 0, 0, 0 + width, height, min_dimension, max_dimension = 0, 0, 0, 0 video_start_pts, video_end_pts = 0, -1 video_timebase_num, video_timebase_den = 0, 1 # audio related @@ -917,6 +1050,7 @@ def test_read_video_in_range_from_memory(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -929,8 +1063,10 @@ def test_read_video_in_range_from_memory(self): audio_timebase_num, audio_timebase_den, ) - vframes, vframe_pts, vtimebase, vfps, vduration, aframes, aframe_pts, \ - atimebase, asample_rate, aduration = tv_result + vframes, vframe_pts, vtimebase, vfps, vduration, \ + aframes, aframe_pts, atimebase, asample_rate, aduration = ( + tv_result + ) self.assertAlmostEqual(config.video_fps, vfps.item(), delta=0.01) for num_frames in [4, 8, 16, 32, 64, 128]: @@ -969,6 +1105,7 @@ def test_read_video_in_range_from_memory(self): width, height, min_dimension, + max_dimension, video_start_pts, video_end_pts, video_timebase_num, @@ -983,31 +1120,41 @@ def test_read_video_in_range_from_memory(self): ) # pass 3: decode frames in range using PyAv - video_timebase_av, audio_timebase_av = _get_timebase_by_av_module(full_path) + video_timebase_av, audio_timebase_av = _get_timebase_by_av_module( + full_path + ) video_start_pts_av = _pts_convert( video_start_pts.item(), Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(video_timebase_av.numerator, video_timebase_av.denominator), + Fraction( + video_timebase_av.numerator, video_timebase_av.denominator + ), math.floor, ) video_end_pts_av = _pts_convert( video_end_pts.item(), Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(video_timebase_av.numerator, video_timebase_av.denominator), + Fraction( + video_timebase_av.numerator, video_timebase_av.denominator + ), math.ceil, ) if audio_timebase_av: audio_start_pts = _pts_convert( video_start_pts.item(), Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator), + Fraction( + audio_timebase_av.numerator, audio_timebase_av.denominator + ), math.floor, ) audio_end_pts = _pts_convert( video_end_pts.item(), Fraction(video_timebase_num.item(), video_timebase_den.item()), - Fraction(audio_timebase_av.numerator, audio_timebase_av.denominator), + Fraction( + audio_timebase_av.numerator, audio_timebase_av.denominator + ), math.ceil, ) @@ -1044,6 +1191,56 @@ def test_probe_video_from_memory(self): probe_result = torch.ops.video_reader.probe_video_from_memory(video_tensor) self.check_probe_result(probe_result, config) + def test_probe_video_from_memory_script(self): + scripted_fun = torch.jit.script(io._probe_video_from_memory) + self.assertIsNotNone(scripted_fun) + + for test_video, config in test_videos.items(): + full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + probe_result = scripted_fun(video_tensor) + self.check_meta_result(probe_result, config) + + @PY39_SKIP + def test_read_video_from_memory_scripted(self): + """ + Test the case when video is already in memory, and decoder reads data in memory + """ + # video related + width, height, min_dimension, max_dimension = 0, 0, 0, 0 + video_start_pts, video_end_pts = 0, -1 + video_timebase_num, video_timebase_den = 0, 1 + # audio related + samples, channels = 0, 0 + audio_start_pts, audio_end_pts = 0, -1 + audio_timebase_num, audio_timebase_den = 0, 1 + + scripted_fun = torch.jit.script(io._read_video_from_memory) + self.assertIsNotNone(scripted_fun) + + for test_video, _config in test_videos.items(): + full_path, video_tensor = _get_video_tensor(VIDEO_DIR, test_video) + + # decode all frames using cpp decoder + scripted_fun( + video_tensor, + seek_frame_margin, + 1, # readVideoStream + width, + height, + min_dimension, + max_dimension, + [video_start_pts, video_end_pts], + video_timebase_num, + video_timebase_den, + 1, # readAudioStream + samples, + channels, + [audio_start_pts, audio_end_pts], + audio_timebase_num, + audio_timebase_den, + ) + # FUTURE: check value of video / audio frames + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/test/test_videoapi.py b/test/test_videoapi.py new file mode 100644 index 00000000000..da73c7cd17d --- /dev/null +++ b/test/test_videoapi.py @@ -0,0 +1,200 @@ +import collections +import os +import unittest + +import torch +import torchvision +from torchvision.io import _HAS_VIDEO_OPT, VideoReader +from torchvision.datasets.utils import download_url + +from common_utils import PY39_SKIP + +try: + import av + + # Do a version test too + torchvision.io.video._check_av_available() +except ImportError: + av = None + + +VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos") + +CheckerConfig = ["duration", "video_fps", "audio_sample_rate"] +GroundTruth = collections.namedtuple("GroundTruth", " ".join(CheckerConfig)) + + +def fate(name, path="."): + """Download and return a path to a sample from the FFmpeg test suite. + See the `FFmpeg Automated Test Environment `_ + """ + + file_name = name.split("/")[1] + download_url("http://fate.ffmpeg.org/fate-suite/" + name, path, file_name) + return os.path.join(path, file_name) + + +test_videos = { + "RATRACE_wave_f_nm_np1_fr_goo_37.avi": GroundTruth( + duration=2.0, video_fps=30.0, audio_sample_rate=None + ), + "SchoolRulesHowTheyHelpUs_wave_f_nm_np1_ba_med_0.avi": GroundTruth( + duration=2.0, video_fps=30.0, audio_sample_rate=None + ), + "TrumanShow_wave_f_nm_np1_fr_med_26.avi": GroundTruth( + duration=2.0, video_fps=30.0, audio_sample_rate=None + ), + "v_SoccerJuggling_g23_c01.avi": GroundTruth( + duration=8.0, video_fps=29.97, audio_sample_rate=None + ), + "v_SoccerJuggling_g24_c01.avi": GroundTruth( + duration=8.0, video_fps=29.97, audio_sample_rate=None + ), + "R6llTwEh07w.mp4": GroundTruth( + duration=10.0, video_fps=30.0, audio_sample_rate=44100 + ), + "SOX5yA1l24A.mp4": GroundTruth( + duration=11.0, video_fps=29.97, audio_sample_rate=48000 + ), + "WUzgd7C1pWA.mp4": GroundTruth( + duration=11.0, video_fps=29.97, audio_sample_rate=48000 + ), +} + + +@unittest.skipIf(_HAS_VIDEO_OPT is False, "Didn't compile with ffmpeg") +@PY39_SKIP +class TestVideoApi(unittest.TestCase): + @unittest.skipIf(av is None, "PyAV unavailable") + def test_frame_reading(self): + for test_video, config in test_videos.items(): + full_path = os.path.join(VIDEO_DIR, test_video) + + av_reader = av.open(full_path) + + if av_reader.streams.video: + video_reader = VideoReader(full_path, "video") + for av_frame in av_reader.decode(av_reader.streams.video[0]): + vr_frame = next(video_reader) + + self.assertAlmostEqual( + float(av_frame.pts * av_frame.time_base), + vr_frame["pts"], + delta=0.1, + ) + + av_array = torch.tensor(av_frame.to_rgb().to_ndarray()).permute( + 2, 0, 1 + ) + vr_array = vr_frame["data"] + mean_delta = torch.mean( + torch.abs(av_array.float() - vr_array.float()) + ) + # on average the difference is very small and caused + # by decoding (around 1%) + # TODO: asses empirically how to set this? atm it's 1% + # averaged over all frames + self.assertTrue(mean_delta.item() < 2.5) + + av_reader = av.open(full_path) + if av_reader.streams.audio: + video_reader = VideoReader(full_path, "audio") + for av_frame in av_reader.decode(av_reader.streams.audio[0]): + vr_frame = next(video_reader) + self.assertAlmostEqual( + float(av_frame.pts * av_frame.time_base), + vr_frame["pts"], + delta=0.1, + ) + + av_array = torch.tensor(av_frame.to_ndarray()).permute(1, 0) + vr_array = vr_frame["data"] + + max_delta = torch.max( + torch.abs(av_array.float() - vr_array.float()) + ) + # we assure that there is never more than 1% difference in signal + self.assertTrue(max_delta.item() < 0.001) + + def test_metadata(self): + """ + Test that the metadata returned via pyav corresponds to the one returned + by the new video decoder API + """ + for test_video, config in test_videos.items(): + full_path = os.path.join(VIDEO_DIR, test_video) + reader = VideoReader(full_path, "video") + reader_md = reader.get_metadata() + self.assertAlmostEqual( + config.video_fps, reader_md["video"]["fps"][0], delta=0.0001 + ) + self.assertAlmostEqual( + config.duration, reader_md["video"]["duration"][0], delta=0.5 + ) + + def test_seek_start(self): + for test_video, config in test_videos.items(): + full_path = os.path.join(VIDEO_DIR, test_video) + + video_reader = VideoReader(full_path, "video") + num_frames = 0 + for frame in video_reader: + num_frames += 1 + + # now seek the container to 0 and do it again + # It's often that starting seek can be inprecise + # this way and it doesn't start at 0 + video_reader.seek(0) + start_num_frames = 0 + for frame in video_reader: + start_num_frames += 1 + + self.assertEqual(start_num_frames, num_frames) + + # now seek the container to < 0 to check for unexpected behaviour + video_reader.seek(-1) + start_num_frames = 0 + for frame in video_reader: + start_num_frames += 1 + + self.assertEqual(start_num_frames, num_frames) + + def test_accurateseek_middle(self): + for test_video, config in test_videos.items(): + full_path = os.path.join(VIDEO_DIR, test_video) + + stream = "video" + video_reader = VideoReader(full_path, stream) + md = video_reader.get_metadata() + duration = md[stream]["duration"][0] + if duration is not None: + + num_frames = 0 + for frame in video_reader: + num_frames += 1 + + video_reader.seek(duration / 2) + middle_num_frames = 0 + for frame in video_reader: + middle_num_frames += 1 + + self.assertTrue(middle_num_frames < num_frames) + self.assertAlmostEqual(middle_num_frames, num_frames // 2, delta=1) + + video_reader.seek(duration / 2) + frame = next(video_reader) + lb = duration / 2 - 1 / md[stream]["fps"][0] + ub = duration / 2 + 1 / md[stream]["fps"][0] + self.assertTrue((lb <= frame["pts"]) & (ub >= frame["pts"])) + + def test_fate_suite(self): + video_path = fate("sub/MovText_capability_tester.mp4", VIDEO_DIR) + vr = VideoReader(video_path) + metadata = vr.get_metadata() + + self.assertTrue(metadata["subtitles"]["duration"] is not None) + os.remove(video_path) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/tracing/frcnn/CMakeLists.txt b/test/tracing/frcnn/CMakeLists.txt new file mode 100644 index 00000000000..c79382470bd --- /dev/null +++ b/test/tracing/frcnn/CMakeLists.txt @@ -0,0 +1,13 @@ +cmake_minimum_required(VERSION 3.1 FATAL_ERROR) +project(test_frcnn_tracing) + +find_package(Torch REQUIRED) +find_package(TorchVision REQUIRED) + +# This due to some headers importing Python.h +find_package(Python3 COMPONENTS Development) + +add_executable(test_frcnn_tracing test_frcnn_tracing.cpp) +target_compile_features(test_frcnn_tracing PUBLIC cxx_range_for) +target_link_libraries(test_frcnn_tracing ${TORCH_LIBRARIES} TorchVision::TorchVision Python3::Python) +set_property(TARGET test_frcnn_tracing PROPERTY CXX_STANDARD 14) diff --git a/test/tracing/frcnn/test_frcnn_tracing.cpp b/test/tracing/frcnn/test_frcnn_tracing.cpp new file mode 100644 index 00000000000..f5f350b6b02 --- /dev/null +++ b/test/tracing/frcnn/test_frcnn_tracing.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include + + +int main() { + torch::DeviceType device_type; + device_type = torch::kCPU; + + torch::jit::script::Module module; + try { + std::cout << "Loading model\n"; + // Deserialize the ScriptModule from a file using torch::jit::load(). + module = torch::jit::load("fasterrcnn_resnet50_fpn.pt"); + std::cout << "Model loaded\n"; + } catch (const torch::Error& e) { + std::cout << "error loading the model\n"; + return -1; + } catch (const std::exception& e) { + std::cout << "Other error: " << e.what() << "\n"; + return -1; + } + + // TorchScript models require a List[IValue] as input + std::vector inputs; + + // Faster RCNN accepts a List[Tensor] as main input + std::vector images; + images.push_back(torch::rand({3, 256, 275})); + images.push_back(torch::rand({3, 256, 275})); + + inputs.push_back(images); + auto output = module.forward(inputs); + + std::cout << "ok\n"; + std::cout << "output" << output << "\n"; + + if (torch::cuda::is_available()) { + // Move traced model to GPU + module.to(torch::kCUDA); + + // Add GPU inputs + images.clear(); + inputs.clear(); + + torch::TensorOptions options = torch::TensorOptions{torch::kCUDA}; + images.push_back(torch::rand({3, 256, 275}, options)); + images.push_back(torch::rand({3, 256, 275}, options)); + + inputs.push_back(images); + auto output = module.forward(inputs); + + std::cout << "ok\n"; + std::cout << "output" << output << "\n"; + } + return 0; +} diff --git a/test/tracing/frcnn/trace_model.py b/test/tracing/frcnn/trace_model.py new file mode 100644 index 00000000000..34961e8684f --- /dev/null +++ b/test/tracing/frcnn/trace_model.py @@ -0,0 +1,14 @@ + +import os.path as osp + +import torch +import torchvision + +HERE = osp.dirname(osp.abspath(__file__)) +ASSETS = osp.dirname(osp.dirname(HERE)) + +model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False) +model.eval() + +traced_model = torch.jit.script(model) +traced_model.save("fasterrcnn_resnet50_fpn.pt") diff --git a/torchvision/__init__.py b/torchvision/__init__.py index ca155712671..9508605b551 100644 --- a/torchvision/__init__.py +++ b/torchvision/__init__.py @@ -1,4 +1,7 @@ import warnings +import os + +from .extension import _HAS_OPS from torchvision import models from torchvision import datasets @@ -7,13 +10,21 @@ from torchvision import utils from torchvision import io -from .extension import _HAS_OPS +import torch try: from .version import __version__ # noqa: F401 except ImportError: pass +# Check if torchvision is being imported within the root folder +if (not _HAS_OPS and os.path.dirname(os.path.realpath(__file__)) == + os.path.join(os.path.realpath(os.getcwd()), 'torchvision')): + message = ('You are importing torchvision within its own root folder ({}). ' + 'This is not expected to work and may give errors. Please exit the ' + 'torchvision project source and relaunch your python interpreter.') + warnings.warn(message.format(os.getcwd())) + _image_backend = 'PIL' _video_backend = "pyav" @@ -49,10 +60,14 @@ def set_video_backend(backend): Args: backend (string): Name of the video backend. one of {'pyav', 'video_reader'}. The :mod:`pyav` package uses the 3rd party PyAv library. It is a Pythonic - binding for the FFmpeg libraries. - The :mod:`video_reader` package includes a native c++ implementation on - top of FFMPEG libraries, and a python API of TorchScript custom operator. - It is generally decoding faster than pyav, but perhaps is less robust. + binding for the FFmpeg libraries. + The :mod:`video_reader` package includes a native C++ implementation on + top of FFMPEG libraries, and a python API of TorchScript custom operator. + It is generally decoding faster than :mod:`pyav`, but perhaps is less robust. + + .. note:: + Building with FFMPEG is disabled by default in the latest master. If you want to use the 'video_reader' + backend, please compile torchvision from source. """ global _video_backend if backend not in ["pyav", "video_reader"]: @@ -60,15 +75,25 @@ def set_video_backend(backend): "Invalid video backend '%s'. Options are 'pyav' and 'video_reader'" % backend ) if backend == "video_reader" and not io._HAS_VIDEO_OPT: - warnings.warn("video_reader video backend is not available") + message = ( + "video_reader video backend is not available." + " Please compile torchvision from source and try again" + ) + warnings.warn(message) else: _video_backend = backend def get_video_backend(): + """ + Returns the currently active video backend used to decode videos. + + Returns: + str: Name of the video backend. one of {'pyav', 'video_reader'}. + """ + return _video_backend def _is_tracing(): - import torch return torch._C._get_tracing_state() diff --git a/torchvision/csrc/PSROIAlign.h b/torchvision/csrc/PSROIAlign.h deleted file mode 100644 index a5998df2891..00000000000 --- a/torchvision/csrc/PSROIAlign.h +++ /dev/null @@ -1,150 +0,0 @@ -#pragma once - -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "cuda/vision_cuda.h" -#endif - -#include - -std::tuple PSROIAlign_forward( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return PSROIAlign_forward_cuda( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return PSROIAlign_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); -} - -at::Tensor PSROIAlign_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const int batch_size, - const int channels, - const int height, - const int width) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return PSROIAlign_backward_cuda( - grad, - rois, - mapping_channel, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio, - batch_size, - channels, - height, - width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return PSROIAlign_backward_cpu( - grad, - rois, - mapping_channel, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio, - batch_size, - channels, - height, - width); -} - -using namespace at; -using torch::Tensor; -using torch::autograd::AutogradContext; -using torch::autograd::Variable; -using torch::autograd::variable_list; - -class PSROIAlignFunction - : public torch::autograd::Function { - public: - static variable_list forward( - AutogradContext* ctx, - Variable input, - Variable rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["pooled_height"] = pooled_height; - ctx->saved_data["pooled_width"] = pooled_width; - ctx->saved_data["sampling_ratio"] = sampling_ratio; - ctx->saved_data["input_shape"] = input.sizes(); - auto result = PSROIAlign_forward( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); - auto output = std::get<0>(result); - auto channel_mapping = std::get<1>(result); - ctx->save_for_backward({rois, channel_mapping}); - ctx->mark_non_differentiable({channel_mapping}); - return {output, channel_mapping}; - } - - static variable_list backward( - AutogradContext* ctx, - variable_list grad_output) { - // Use data saved in forward - auto saved = ctx->get_saved_variables(); - auto rois = saved[0]; - auto channel_mapping = saved[1]; - auto input_shape = ctx->saved_data["input_shape"].toIntList(); - auto grad_in = PSROIAlign_backward( - grad_output[0], - rois, - channel_mapping, - ctx->saved_data["spatial_scale"].toDouble(), - ctx->saved_data["pooled_height"].toInt(), - ctx->saved_data["pooled_width"].toInt(), - ctx->saved_data["sampling_ratio"].toInt(), - input_shape[0], - input_shape[1], - input_shape[2], - input_shape[3]); - return { - grad_in, Variable(), Variable(), Variable(), Variable(), Variable()}; - } -}; - -std::tuple ps_roi_align( - const Tensor& input, - const Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - auto result = PSROIAlignFunction::apply( - input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); - return std::tuple(result[0], result[1]); -} diff --git a/torchvision/csrc/PSROIPool.h b/torchvision/csrc/PSROIPool.h deleted file mode 100644 index c67ce92f54e..00000000000 --- a/torchvision/csrc/PSROIPool.h +++ /dev/null @@ -1,128 +0,0 @@ -#pragma once - -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "cuda/vision_cuda.h" -#endif - -std::tuple PSROIPool_forward( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return PSROIPool_forward_cuda( - input, rois, spatial_scale, pooled_height, pooled_width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return PSROIPool_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width); -} - -at::Tensor PSROIPool_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return PSROIPool_backward_cuda( - grad, - rois, - mapping_channel, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return PSROIPool_backward_cpu( - grad, - rois, - mapping_channel, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width); -} - -using namespace at; -using torch::Tensor; -using torch::autograd::AutogradContext; -using torch::autograd::Variable; -using torch::autograd::variable_list; - -class PSROIPoolFunction : public torch::autograd::Function { - public: - static variable_list forward( - AutogradContext* ctx, - Variable input, - Variable rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["pooled_height"] = pooled_height; - ctx->saved_data["pooled_width"] = pooled_width; - ctx->saved_data["input_shape"] = input.sizes(); - auto result = PSROIPool_forward( - input, rois, spatial_scale, pooled_height, pooled_width); - auto output = std::get<0>(result); - auto channel_mapping = std::get<1>(result); - ctx->save_for_backward({rois, channel_mapping}); - ctx->mark_non_differentiable({channel_mapping}); - return {output, channel_mapping}; - } - - static variable_list backward( - AutogradContext* ctx, - variable_list grad_output) { - // Use data saved in forward - auto saved = ctx->get_saved_variables(); - auto rois = saved[0]; - auto channel_mapping = saved[1]; - auto input_shape = ctx->saved_data["input_shape"].toIntList(); - auto grad_in = PSROIPool_backward( - grad_output[0], - rois, - channel_mapping, - ctx->saved_data["spatial_scale"].toDouble(), - ctx->saved_data["pooled_height"].toInt(), - ctx->saved_data["pooled_width"].toInt(), - input_shape[0], - input_shape[1], - input_shape[2], - input_shape[3]); - return {grad_in, Variable(), Variable(), Variable(), Variable()}; - } -}; - -std::tuple ps_roi_pool( - const Tensor& input, - const Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - auto result = PSROIPoolFunction::apply( - input, rois, spatial_scale, pooled_height, pooled_width); - return std::tuple(result[0], result[1]); -} diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/ROIAlign.h deleted file mode 100644 index 765d4879d99..00000000000 --- a/torchvision/csrc/ROIAlign.h +++ /dev/null @@ -1,147 +0,0 @@ -#pragma once - -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "cuda/vision_cuda.h" -#endif - -// Interface for Python -at::Tensor ROIAlign_forward( - const at::Tensor& input, // Input feature map. - const at::Tensor& rois, // List of ROIs to pool over. - const double spatial_scale, // The scale of the image features. ROIs will be - // scaled to this. - const int64_t pooled_height, // The height of the pooled feature map. - const int64_t pooled_width, // The width of the pooled feature - const int64_t sampling_ratio) // The number of points to sample in each bin -// along each axis. -{ - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIAlign_forward_cuda( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlign_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); -} - -at::Tensor ROIAlign_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIAlign_backward_cuda( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIAlign_backward_cpu( - grad, - rois, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width, - sampling_ratio); -} - -using namespace at; -using torch::Tensor; -using torch::autograd::AutogradContext; -using torch::autograd::Variable; -using torch::autograd::variable_list; - -class ROIAlignFunction : public torch::autograd::Function { - public: - static variable_list forward( - AutogradContext* ctx, - Variable input, - Variable rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["pooled_height"] = pooled_height; - ctx->saved_data["pooled_width"] = pooled_width; - ctx->saved_data["sampling_ratio"] = sampling_ratio; - ctx->saved_data["input_shape"] = input.sizes(); - ctx->save_for_backward({rois}); - auto result = ROIAlign_forward( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio); - return {result}; - } - - static variable_list backward( - AutogradContext* ctx, - variable_list grad_output) { - // Use data saved in forward - auto saved = ctx->get_saved_variables(); - auto rois = saved[0]; - auto input_shape = ctx->saved_data["input_shape"].toIntList(); - auto grad_in = ROIAlign_backward( - grad_output[0], - rois, - ctx->saved_data["spatial_scale"].toDouble(), - ctx->saved_data["pooled_height"].toInt(), - ctx->saved_data["pooled_width"].toInt(), - input_shape[0], - input_shape[1], - input_shape[2], - input_shape[3], - ctx->saved_data["sampling_ratio"].toInt()); - return { - grad_in, Variable(), Variable(), Variable(), Variable(), Variable()}; - } -}; - -Tensor roi_align( - const Tensor& input, - const Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width, - const int64_t sampling_ratio) { - return ROIAlignFunction::apply( - input, - rois, - spatial_scale, - pooled_height, - pooled_width, - sampling_ratio)[0]; -} diff --git a/torchvision/csrc/ROIPool.h b/torchvision/csrc/ROIPool.h deleted file mode 100644 index 79b40293176..00000000000 --- a/torchvision/csrc/ROIPool.h +++ /dev/null @@ -1,128 +0,0 @@ -#pragma once - -#include "cpu/vision_cpu.h" - -#ifdef WITH_CUDA -#include "cuda/vision_cuda.h" -#endif - -std::tuple ROIPool_forward( - const at::Tensor& input, - const at::Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - if (input.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIPool_forward_cuda( - input, rois, spatial_scale, pooled_height, pooled_width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIPool_forward_cpu( - input, rois, spatial_scale, pooled_height, pooled_width); -} - -at::Tensor ROIPool_backward( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width) { - if (grad.type().is_cuda()) { -#ifdef WITH_CUDA - return ROIPool_backward_cuda( - grad, - rois, - argmax, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width); -#else - AT_ERROR("Not compiled with GPU support"); -#endif - } - return ROIPool_backward_cpu( - grad, - rois, - argmax, - spatial_scale, - pooled_height, - pooled_width, - batch_size, - channels, - height, - width); -} - -using namespace at; -using torch::Tensor; -using torch::autograd::AutogradContext; -using torch::autograd::Variable; -using torch::autograd::variable_list; - -class ROIPoolFunction : public torch::autograd::Function { - public: - static variable_list forward( - AutogradContext* ctx, - Variable input, - Variable rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - ctx->saved_data["spatial_scale"] = spatial_scale; - ctx->saved_data["pooled_height"] = pooled_height; - ctx->saved_data["pooled_width"] = pooled_width; - ctx->saved_data["input_shape"] = input.sizes(); - auto result = ROIPool_forward( - input, rois, spatial_scale, pooled_height, pooled_width); - auto output = std::get<0>(result); - auto argmax = std::get<1>(result); - ctx->save_for_backward({rois, argmax}); - ctx->mark_non_differentiable({argmax}); - return {output, argmax}; - } - - static variable_list backward( - AutogradContext* ctx, - variable_list grad_output) { - // Use data saved in forward - auto saved = ctx->get_saved_variables(); - auto rois = saved[0]; - auto argmax = saved[1]; - auto input_shape = ctx->saved_data["input_shape"].toIntList(); - auto grad_in = ROIPool_backward( - grad_output[0], - rois, - argmax, - ctx->saved_data["spatial_scale"].toDouble(), - ctx->saved_data["pooled_height"].toInt(), - ctx->saved_data["pooled_width"].toInt(), - input_shape[0], - input_shape[1], - input_shape[2], - input_shape[3]); - return {grad_in, Variable(), Variable(), Variable(), Variable()}; - } -}; - -std::tuple roi_pool( - const Tensor& input, - const Tensor& rois, - const double spatial_scale, - const int64_t pooled_height, - const int64_t pooled_width) { - auto result = ROIPoolFunction::apply( - input, rois, spatial_scale, pooled_height, pooled_width); - return std::tuple(result[0], result[1]); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.cpp b/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.cpp deleted file mode 100644 index 24aecacf946..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.cpp +++ /dev/null @@ -1,118 +0,0 @@ -#include "FfmpegAudioSampler.h" -#include -#include "FfmpegUtil.h" - -using namespace std; - -FfmpegAudioSampler::FfmpegAudioSampler( - const AudioFormat& in, - const AudioFormat& out) - : inFormat_(in), outFormat_(out) {} - -FfmpegAudioSampler::~FfmpegAudioSampler() { - if (swrContext_) { - swr_free(&swrContext_); - } -} - -int FfmpegAudioSampler::init() { - swrContext_ = swr_alloc_set_opts( - nullptr, // we're allocating a new context - av_get_default_channel_layout(outFormat_.channels), // out_ch_layout - static_cast(outFormat_.format), // out_sample_fmt - outFormat_.samples, // out_sample_rate - av_get_default_channel_layout(inFormat_.channels), // in_ch_layout - static_cast(inFormat_.format), // in_sample_fmt - inFormat_.samples, // in_sample_rate - 0, // log_offset - nullptr); // log_ctx - if (swrContext_ == nullptr) { - LOG(ERROR) << "swr_alloc_set_opts fails"; - return -1; - } - int result = 0; - if ((result = swr_init(swrContext_)) < 0) { - LOG(ERROR) << "swr_init failed, err: " << ffmpeg_util::getErrorDesc(result) - << ", in -> format: " << inFormat_.format - << ", channels: " << inFormat_.channels - << ", samples: " << inFormat_.samples - << ", out -> format: " << outFormat_.format - << ", channels: " << outFormat_.channels - << ", samples: " << outFormat_.samples; - return -1; - } - return 0; -} - -int64_t FfmpegAudioSampler::getSampleBytes(const AVFrame* frame) const { - auto outSamples = getOutNumSamples(frame->nb_samples); - - return av_samples_get_buffer_size( - nullptr, - outFormat_.channels, - outSamples, - static_cast(outFormat_.format), - 1); -} - -// https://www.ffmpeg.org/doxygen/3.2/group__lswr.html -unique_ptr FfmpegAudioSampler::sample(const AVFrame* frame) { - if (!frame) { - return nullptr; // no flush for videos - } - - auto inNumSamples = frame->nb_samples; - auto outNumSamples = getOutNumSamples(frame->nb_samples); - - auto outSampleSize = getSampleBytes(frame); - AvDataPtr frameData(static_cast(av_malloc(outSampleSize))); - - uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS]; - int result = 0; - if ((result = av_samples_fill_arrays( - outPlanes, - nullptr, // linesize is not needed - frameData.get(), - outFormat_.channels, - outNumSamples, - static_cast(outFormat_.format), - 1)) < 0) { - LOG(ERROR) << "av_samples_fill_arrays failed, err: " - << ffmpeg_util::getErrorDesc(result) - << ", outNumSamples: " << outNumSamples - << ", format: " << outFormat_.format; - return nullptr; - } - - if ((result = swr_convert( - swrContext_, - &outPlanes[0], - outNumSamples, - (const uint8_t**)&frame->data[0], - inNumSamples)) < 0) { - LOG(ERROR) << "swr_convert faield, err: " - << ffmpeg_util::getErrorDesc(result); - return nullptr; - } - // result returned by swr_convert is the No. of actual output samples. - // So update the buffer size using av_samples_get_buffer_size - result = av_samples_get_buffer_size( - nullptr, - outFormat_.channels, - result, - static_cast(outFormat_.format), - 1); - - return make_unique(std::move(frameData), result, 0); -} -/* -Because of decoding delay, the returned value is an upper bound of No. of -output samples -*/ -int64_t FfmpegAudioSampler::getOutNumSamples(int inNumSamples) const { - return av_rescale_rnd( - swr_get_delay(swrContext_, inFormat_.samples) + inNumSamples, - outFormat_.samples, - inFormat_.samples, - AV_ROUND_UP); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.h b/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.h deleted file mode 100644 index 767a5ca6e4f..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegAudioSampler.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "FfmpegSampler.h" - -#define AVRESAMPLE_MAX_CHANNELS 32 - -/** - * Class transcode audio frames from one format into another - */ -class FfmpegAudioSampler : public FfmpegSampler { - public: - explicit FfmpegAudioSampler(const AudioFormat& in, const AudioFormat& out); - ~FfmpegAudioSampler() override; - - int init() override; - - int64_t getSampleBytes(const AVFrame* frame) const; - // FfmpegSampler overrides - // returns number of bytes of the sampled data - std::unique_ptr sample(const AVFrame* frame) override; - - const AudioFormat& getInFormat() const { - return inFormat_; - } - - private: - int64_t getOutNumSamples(int inNumSamples) const; - - AudioFormat inFormat_; - AudioFormat outFormat_; - SwrContext* swrContext_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.cpp b/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.cpp deleted file mode 100644 index b5b1e2fbda5..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "FfmpegAudioStream.h" -#include "FfmpegUtil.h" - -using namespace std; - -namespace { - -bool operator==(const AudioFormat& x, const AVCodecContext& y) { - return x.samples == y.sample_rate && x.channels == y.channels && - x.format == y.sample_fmt; -} - -AudioFormat& toAudioFormat( - AudioFormat& audioFormat, - const AVCodecContext& codecCtx) { - audioFormat.samples = codecCtx.sample_rate; - audioFormat.channels = codecCtx.channels; - audioFormat.format = codecCtx.sample_fmt; - - return audioFormat; -} - -} // namespace - -FfmpegAudioStream::FfmpegAudioStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - MediaFormat mediaFormat, - double seekFrameMargin) - : FfmpegStream(inputCtx, index, avMediaType, seekFrameMargin), - mediaFormat_(mediaFormat) {} - -FfmpegAudioStream::~FfmpegAudioStream() {} - -void FfmpegAudioStream::checkStreamDecodeParams() { - auto timeBase = getTimeBase(); - if (timeBase.first > 0) { - CHECK_EQ(timeBase.first, inputCtx_->streams[index_]->time_base.num); - CHECK_EQ(timeBase.second, inputCtx_->streams[index_]->time_base.den); - } -} - -void FfmpegAudioStream::updateStreamDecodeParams() { - auto timeBase = getTimeBase(); - if (timeBase.first == 0) { - mediaFormat_.format.audio.timeBaseNum = - inputCtx_->streams[index_]->time_base.num; - mediaFormat_.format.audio.timeBaseDen = - inputCtx_->streams[index_]->time_base.den; - } - mediaFormat_.format.audio.duration = inputCtx_->streams[index_]->duration; -} - -int FfmpegAudioStream::initFormat() { - AudioFormat& format = mediaFormat_.format.audio; - - if (format.samples == 0) { - format.samples = codecCtx_->sample_rate; - } - if (format.channels == 0) { - format.channels = codecCtx_->channels; - } - if (format.format == AV_SAMPLE_FMT_NONE) { - format.format = codecCtx_->sample_fmt; - VLOG(2) << "set stream format sample_fmt: " << format.format; - } - - checkStreamDecodeParams(); - - updateStreamDecodeParams(); - - if (format.samples > 0 && format.channels > 0 && - format.format != AV_SAMPLE_FMT_NONE) { - return 0; - } else { - return -1; - } -} - -unique_ptr FfmpegAudioStream::sampleFrameData() { - AudioFormat& audioFormat = mediaFormat_.format.audio; - - if (!sampler_ || !(sampler_->getInFormat() == *codecCtx_)) { - AudioFormat newInFormat; - newInFormat = toAudioFormat(newInFormat, *codecCtx_); - sampler_ = make_unique(newInFormat, audioFormat); - VLOG(1) << "Set sampler input audio format" - << ", samples: " << newInFormat.samples - << ", channels: " << newInFormat.channels - << ", format: " << newInFormat.format - << " : output audio sampler format" - << ", samples: " << audioFormat.samples - << ", channels: " << audioFormat.channels - << ", format: " << audioFormat.format; - int ret = sampler_->init(); - if (ret < 0) { - VLOG(1) << "Fail to initialize audio sampler"; - return nullptr; - } - } - return sampler_->sample(frame_); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.h b/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.h deleted file mode 100644 index 1d4f7a2f2ee..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegAudioStream.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include "FfmpegAudioSampler.h" -#include "FfmpegStream.h" - -/** - * Class uses FFMPEG library to decode one video stream. - */ -class FfmpegAudioStream : public FfmpegStream { - public: - explicit FfmpegAudioStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - MediaFormat mediaFormat, - double seekFrameMargin); - - ~FfmpegAudioStream() override; - - // FfmpegStream overrides - MediaType getMediaType() const override { - return MediaType::TYPE_AUDIO; - } - - FormatUnion getMediaFormat() const override { - return mediaFormat_.format; - } - - int64_t getStartPts() const override { - return mediaFormat_.format.audio.startPts; - } - int64_t getEndPts() const override { - return mediaFormat_.format.audio.endPts; - } - // return numerator and denominator of time base - std::pair getTimeBase() const { - return std::make_pair( - mediaFormat_.format.audio.timeBaseNum, - mediaFormat_.format.audio.timeBaseDen); - } - - void checkStreamDecodeParams(); - - void updateStreamDecodeParams(); - - protected: - int initFormat() override; - std::unique_ptr sampleFrameData() override; - - private: - MediaFormat mediaFormat_; - std::unique_ptr sampler_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegDecoder.cpp b/torchvision/csrc/cpu/video_reader/FfmpegDecoder.cpp deleted file mode 100644 index fb4d302cc03..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegDecoder.cpp +++ /dev/null @@ -1,412 +0,0 @@ -#include "FfmpegDecoder.h" -#include "FfmpegAudioStream.h" -#include "FfmpegUtil.h" -#include "FfmpegVideoStream.h" - -using namespace std; - -static AVPacket avPkt; - -namespace { - -unique_ptr createFfmpegStream( - MediaType type, - AVFormatContext* ctx, - int idx, - MediaFormat& mediaFormat, - double seekFrameMargin) { - enum AVMediaType avType; - CHECK(ffmpeg_util::mapMediaType(type, &avType)); - switch (type) { - case MediaType::TYPE_VIDEO: - return make_unique( - ctx, idx, avType, mediaFormat, seekFrameMargin); - case MediaType::TYPE_AUDIO: - return make_unique( - ctx, idx, avType, mediaFormat, seekFrameMargin); - default: - return nullptr; - } -} - -} // namespace - -FfmpegAvioContext::FfmpegAvioContext() - : workBuffersize_(VIO_BUFFER_SZ), - workBuffer_((uint8_t*)av_malloc(workBuffersize_)), - inputFile_(nullptr), - inputBuffer_(nullptr), - inputBufferSize_(0) {} - -int FfmpegAvioContext::initAVIOContext(const uint8_t* buffer, int64_t size) { - inputBuffer_ = buffer; - inputBufferSize_ = size; - avioCtx_ = avio_alloc_context( - workBuffer_, - workBuffersize_, - 0, - reinterpret_cast(this), - &FfmpegAvioContext::readMemory, - nullptr, // no write function - &FfmpegAvioContext::seekMemory); - return 0; -} - -FfmpegAvioContext::~FfmpegAvioContext() { - /* note: the internal buffer could have changed, and be != workBuffer_ */ - if (avioCtx_) { - av_freep(&avioCtx_->buffer); - av_freep(&avioCtx_); - } else { - av_freep(&workBuffer_); - } - if (inputFile_) { - fclose(inputFile_); - } -} - -int FfmpegAvioContext::read(uint8_t* buf, int buf_size) { - if (inputBuffer_) { - return readMemory(this, buf, buf_size); - } else { - return -1; - } -} - -int FfmpegAvioContext::readMemory(void* opaque, uint8_t* buf, int buf_size) { - FfmpegAvioContext* h = static_cast(opaque); - if (buf_size < 0) { - return -1; - } - - int reminder = h->inputBufferSize_ - h->offset_; - int r = buf_size < reminder ? buf_size : reminder; - if (r < 0) { - return AVERROR_EOF; - } - - memcpy(buf, h->inputBuffer_ + h->offset_, r); - h->offset_ += r; - return r; -} - -int64_t FfmpegAvioContext::seek(int64_t offset, int whence) { - if (inputBuffer_) { - return seekMemory(this, offset, whence); - } else { - return -1; - } -} - -int64_t FfmpegAvioContext::seekMemory( - void* opaque, - int64_t offset, - int whence) { - FfmpegAvioContext* h = static_cast(opaque); - switch (whence) { - case SEEK_CUR: // from current position - h->offset_ += offset; - break; - case SEEK_END: // from eof - h->offset_ = h->inputBufferSize_ + offset; - break; - case SEEK_SET: // from beginning of file - h->offset_ = offset; - break; - case AVSEEK_SIZE: - return h->inputBufferSize_; - } - return h->offset_; -} - -int FfmpegDecoder::init( - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput) { - cleanUp(); - - int ret = 0; - if (!isDecodeFile) { - formatCtx_ = avformat_alloc_context(); - if (!formatCtx_) { - LOG(ERROR) << "avformat_alloc_context failed"; - return -1; - } - formatCtx_->pb = ioctx.get_avio(); - formatCtx_->flags |= AVFMT_FLAG_CUSTOM_IO; - - // Determining the input format: - int probeSz = AVPROBE_SIZE + AVPROBE_PADDING_SIZE; - uint8_t* probe((uint8_t*)av_malloc(probeSz)); - memset(probe, 0, probeSz); - int len = ioctx.read(probe, probeSz - AVPROBE_PADDING_SIZE); - if (len < probeSz - AVPROBE_PADDING_SIZE) { - LOG(ERROR) << "Insufficient data to determine video format"; - av_freep(&probe); - return -1; - } - // seek back to start of stream - ioctx.seek(0, SEEK_SET); - - unique_ptr probeData(new AVProbeData()); - probeData->buf = probe; - probeData->buf_size = len; - probeData->filename = ""; - // Determine the input-format: - formatCtx_->iformat = av_probe_input_format(probeData.get(), 1); - // this is to avoid the double-free error - if (formatCtx_->iformat == nullptr) { - LOG(ERROR) << "av_probe_input_format fails"; - return -1; - } - VLOG(1) << "av_probe_input_format succeeds"; - av_freep(&probe); - - ret = avformat_open_input(&formatCtx_, "", nullptr, nullptr); - } else { - ret = avformat_open_input(&formatCtx_, filename.c_str(), nullptr, nullptr); - } - - if (ret < 0) { - LOG(ERROR) << "avformat_open_input failed, error: " - << ffmpeg_util::getErrorDesc(ret); - cleanUp(); - return ret; - } - ret = avformat_find_stream_info(formatCtx_, nullptr); - if (ret < 0) { - LOG(ERROR) << "avformat_find_stream_info failed, error: " - << ffmpeg_util::getErrorDesc(ret); - cleanUp(); - return ret; - } - if (!initStreams()) { - LOG(ERROR) << "Cannot activate streams"; - cleanUp(); - return -1; - } - - for (auto& stream : streams_) { - MediaType mediaType = stream.second->getMediaType(); - decoderOutput.initMediaType(mediaType, stream.second->getMediaFormat()); - } - VLOG(1) << "FfmpegDecoder initialized"; - return 0; -} - -int FfmpegDecoder::decodeFile( - unique_ptr params, - const string& fileName, - DecoderOutput& decoderOutput) { - VLOG(1) << "decode file: " << fileName; - FfmpegAvioContext ioctx; - int ret = decodeLoop(std::move(params), fileName, true, ioctx, decoderOutput); - return ret; -} - -int FfmpegDecoder::decodeMemory( - unique_ptr params, - const uint8_t* buffer, - int64_t size, - DecoderOutput& decoderOutput) { - VLOG(1) << "decode video data in memory"; - FfmpegAvioContext ioctx; - int ret = ioctx.initAVIOContext(buffer, size); - if (ret == 0) { - ret = - decodeLoop(std::move(params), string(""), false, ioctx, decoderOutput); - } - return ret; -} - -int FfmpegDecoder::probeFile( - unique_ptr params, - const string& fileName, - DecoderOutput& decoderOutput) { - VLOG(1) << "probe file: " << fileName; - FfmpegAvioContext ioctx; - return probeVideo(std::move(params), fileName, true, ioctx, decoderOutput); -} - -int FfmpegDecoder::probeMemory( - unique_ptr params, - const uint8_t* buffer, - int64_t size, - DecoderOutput& decoderOutput) { - VLOG(1) << "probe video data in memory"; - FfmpegAvioContext ioctx; - int ret = ioctx.initAVIOContext(buffer, size); - if (ret == 0) { - ret = - probeVideo(std::move(params), string(""), false, ioctx, decoderOutput); - } - return ret; -} - -void FfmpegDecoder::cleanUp() { - if (formatCtx_) { - for (auto& stream : streams_) { - // Drain stream buffers. - DecoderOutput decoderOutput; - stream.second->flush(1, decoderOutput); - stream.second.reset(); - } - streams_.clear(); - avformat_close_input(&formatCtx_); - } -} - -FfmpegStream* FfmpegDecoder::findStreamByIndex(int streamIndex) const { - auto it = streams_.find(streamIndex); - return it != streams_.end() ? it->second.get() : nullptr; -} - -/* -Reference implementation: -https://ffmpeg.org/doxygen/3.4/demuxing_decoding_8c-example.html -*/ -int FfmpegDecoder::decodeLoop( - unique_ptr params, - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput) { - params_ = std::move(params); - - int ret = init(filename, isDecodeFile, ioctx, decoderOutput); - if (ret < 0) { - return ret; - } - // init package - av_init_packet(&avPkt); - avPkt.data = nullptr; - avPkt.size = 0; - - int result = 0; - bool ptsInRange = true; - while (ptsInRange) { - result = av_read_frame(formatCtx_, &avPkt); - if (result == AVERROR(EAGAIN)) { - VLOG(1) << "Decoder is busy"; - ret = 0; - break; - } else if (result == AVERROR_EOF) { - VLOG(1) << "Stream decoding is completed"; - ret = 0; - break; - } else if (result < 0) { - VLOG(1) << "av_read_frame fails. Break decoder loop. Error: " - << ffmpeg_util::getErrorDesc(result); - ret = result; - break; - } - - ret = 0; - auto stream = findStreamByIndex(avPkt.stream_index); - if (stream == nullptr) { - // the packet is from a stream the caller is not interested. Ignore it - VLOG(2) << "avPkt ignored. stream index: " << avPkt.stream_index; - // Need to free the memory of AVPacket. Otherwise, memory leak happens - av_packet_unref(&avPkt); - continue; - } - - do { - result = stream->sendPacket(&avPkt); - if (result == AVERROR(EAGAIN)) { - VLOG(2) << "avcodec_send_packet returns AVERROR(EAGAIN)"; - // start to recevie available frames from internal buffer - stream->receiveAvailFrames(params_->getPtsOnly, decoderOutput); - if (isPtsExceedRange()) { - // exit the most-outer while loop - VLOG(1) << "In all streams, exceed the end pts. Exit decoding loop"; - ret = 0; - ptsInRange = false; - break; - } - } else if (result < 0) { - LOG(WARNING) << "avcodec_send_packet failed. Error: " - << ffmpeg_util::getErrorDesc(result); - ret = result; - break; - } else { - VLOG(2) << "avcodec_send_packet succeeds"; - // succeed. Read the next AVPacket and send out it - break; - } - } while (ptsInRange); - // Need to free the memory of AVPacket. Otherwise, memory leak happens - av_packet_unref(&avPkt); - } - /* flush cached frames */ - flushStreams(decoderOutput); - return ret; -} - -int FfmpegDecoder::probeVideo( - unique_ptr params, - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput) { - params_ = std::move(params); - return init(filename, isDecodeFile, ioctx, decoderOutput); -} - -bool FfmpegDecoder::initStreams() { - for (auto it = params_->formats.begin(); it != params_->formats.end(); ++it) { - AVMediaType mediaType; - if (!ffmpeg_util::mapMediaType(it->first, &mediaType)) { - LOG(ERROR) << "Unknown media type: " << it->first; - return false; - } - int streamIdx = - av_find_best_stream(formatCtx_, mediaType, -1, -1, nullptr, 0); - - if (streamIdx >= 0) { - VLOG(2) << "find stream index: " << streamIdx; - auto stream = createFfmpegStream( - it->first, - formatCtx_, - streamIdx, - it->second, - params_->seekFrameMargin); - - CHECK(stream); - if (stream->openCodecContext() < 0) { - LOG(ERROR) << "Cannot open codec. Stream index: " << streamIdx; - return false; - } - streams_.emplace(streamIdx, move(stream)); - } else { - VLOG(1) << "Cannot open find stream of type " << it->first; - } - } - // Seek frames in each stream - int ret = 0; - for (auto& stream : streams_) { - auto startPts = stream.second->getStartPts(); - VLOG(1) << "stream: " << stream.first << " startPts: " << startPts; - if (startPts > 0 && (ret = stream.second->seekFrame(startPts)) < 0) { - LOG(WARNING) << "seekFrame in stream fails"; - return false; - } - } - VLOG(1) << "initStreams succeeds"; - return true; -} - -bool FfmpegDecoder::isPtsExceedRange() { - bool exceed = true; - for (auto& stream : streams_) { - exceed = exceed && stream.second->isFramePtsExceedRange(); - } - return exceed; -} - -void FfmpegDecoder::flushStreams(DecoderOutput& decoderOutput) { - for (auto& stream : streams_) { - stream.second->flush(params_->getPtsOnly, decoderOutput); - } -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegDecoder.h b/torchvision/csrc/cpu/video_reader/FfmpegDecoder.h deleted file mode 100644 index a0a564a4214..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegDecoder.h +++ /dev/null @@ -1,127 +0,0 @@ -#pragma once - -#include -#include - -#include "FfmpegHeaders.h" -#include "FfmpegStream.h" -#include "Interface.h" - -#define VIO_BUFFER_SZ 81920 -#define AVPROBE_SIZE 8192 - -class DecoderParameters { - public: - std::unordered_map formats; - // av_seek_frame is imprecise so seek to a timestamp earlier by a margin - // The unit of margin is second - double seekFrameMargin{1.0}; - // When getPtsOnly is set to 1, we only get pts of each frame and don not - // output frame data. It will be much faster - int64_t getPtsOnly{0}; -}; - -class FfmpegAvioContext { - public: - FfmpegAvioContext(); - - int initAVIOContext(const uint8_t* buffer, int64_t size); - - ~FfmpegAvioContext(); - - int read(uint8_t* buf, int buf_size); - - static int readMemory(void* opaque, uint8_t* buf, int buf_size); - - int64_t seek(int64_t offset, int whence); - - static int64_t seekMemory(void* opaque, int64_t offset, int whence); - - AVIOContext* get_avio() { - return avioCtx_; - } - - private: - int workBuffersize_; - uint8_t* workBuffer_; - // for file mode - FILE* inputFile_; - // for memory mode - const uint8_t* inputBuffer_; - int inputBufferSize_; - int offset_ = 0; - - AVIOContext* avioCtx_{nullptr}; -}; - -class FfmpegDecoder { - public: - FfmpegDecoder() { - av_register_all(); - } - ~FfmpegDecoder() { - cleanUp(); - } - // return 0 on success - // return negative number on failure - int decodeFile( - std::unique_ptr params, - const std::string& filename, - DecoderOutput& decoderOutput); - // return 0 on success - // return negative number on failure - int decodeMemory( - std::unique_ptr params, - const uint8_t* buffer, - int64_t size, - DecoderOutput& decoderOutput); - // return 0 on success - // return negative number on failure - int probeFile( - std::unique_ptr params, - const std::string& filename, - DecoderOutput& decoderOutput); - // return 0 on success - // return negative number on failure - int probeMemory( - std::unique_ptr params, - const uint8_t* buffer, - int64_t size, - DecoderOutput& decoderOutput); - - void cleanUp(); - - private: - FfmpegStream* findStreamByIndex(int streamIndex) const; - - int init( - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput); - // return 0 on success - // return negative number on failure - int decodeLoop( - std::unique_ptr params, - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput); - - int probeVideo( - std::unique_ptr params, - const std::string& filename, - bool isDecodeFile, - FfmpegAvioContext& ioctx, - DecoderOutput& decoderOutput); - - bool initStreams(); - - void flushStreams(DecoderOutput& decoderOutput); - // whether in all streams, the pts of most recent frame exceeds range - bool isPtsExceedRange(); - - std::unordered_map> streams_; - AVFormatContext* formatCtx_{nullptr}; - std::unique_ptr params_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegHeaders.h b/torchvision/csrc/cpu/video_reader/FfmpegHeaders.h deleted file mode 100644 index ff26aa30a8d..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegHeaders.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -extern "C" { -#include -#include -#include -#include -#include -#include -#include -#include -#include -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegSampler.h b/torchvision/csrc/cpu/video_reader/FfmpegSampler.h deleted file mode 100644 index 3d00be3486f..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegSampler.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include "FfmpegHeaders.h" -#include "Interface.h" - -/** - * Class sample data from AVFrame - */ -class FfmpegSampler { - public: - virtual ~FfmpegSampler() = default; - // return 0 on success and negative number on failure - virtual int init() = 0; - // sample from the given frame - virtual std::unique_ptr sample(const AVFrame* frame) = 0; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegStream.cpp b/torchvision/csrc/cpu/video_reader/FfmpegStream.cpp deleted file mode 100644 index b745170baf4..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegStream.cpp +++ /dev/null @@ -1,188 +0,0 @@ -#include "FfmpegStream.h" -#include "FfmpegUtil.h" - -using namespace std; - -// (TODO) Currently, disable the use of refCount -static int refCount = 0; - -FfmpegStream::FfmpegStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - double seekFrameMargin) - : inputCtx_(inputCtx), - index_(index), - avMediaType_(avMediaType), - seekFrameMargin_(seekFrameMargin) {} - -FfmpegStream::~FfmpegStream() { - if (frame_) { - av_frame_free(&frame_); - } - avcodec_free_context(&codecCtx_); -} - -int FfmpegStream::openCodecContext() { - VLOG(2) << "stream start_time: " << inputCtx_->streams[index_]->start_time; - - auto typeString = av_get_media_type_string(avMediaType_); - AVStream* st = inputCtx_->streams[index_]; - auto codec_id = st->codecpar->codec_id; - VLOG(1) << "codec_id: " << codec_id; - AVCodec* codec = avcodec_find_decoder(codec_id); - if (!codec) { - LOG(ERROR) << "avcodec_find_decoder failed for codec_id: " << int(codec_id); - return AVERROR(EINVAL); - } - VLOG(1) << "Succeed to find decoder"; - - codecCtx_ = avcodec_alloc_context3(codec); - if (!codecCtx_) { - LOG(ERROR) << "avcodec_alloc_context3 fails"; - return AVERROR(ENOMEM); - } - - int ret; - /* Copy codec parameters from input stream to output codec context */ - if ((ret = avcodec_parameters_to_context(codecCtx_, st->codecpar)) < 0) { - LOG(ERROR) << "Failed to copy " << typeString - << " codec parameters to decoder context"; - return ret; - } - - AVDictionary* opts = nullptr; - av_dict_set(&opts, "refcounted_frames", refCount ? "1" : "0", 0); - - // after avcodec_open2, value of codecCtx_->time_base is NOT meaningful - // But inputCtx_->streams[index_]->time_base has meaningful values - if ((ret = avcodec_open2(codecCtx_, codec, &opts)) < 0) { - LOG(ERROR) << "avcodec_open2 failed. " << ffmpeg_util::getErrorDesc(ret); - return ret; - } - VLOG(1) << "Succeed to open codec"; - - frame_ = av_frame_alloc(); - return initFormat(); -} - -unique_ptr FfmpegStream::getFrameData(int getPtsOnly) { - if (!codecCtx_) { - LOG(ERROR) << "Codec is not initialized"; - return nullptr; - } - if (getPtsOnly) { - unique_ptr decodedFrame = make_unique(); - decodedFrame->pts_ = frame_->pts; - return decodedFrame; - } else { - unique_ptr decodedFrame = sampleFrameData(); - if (decodedFrame) { - decodedFrame->pts_ = frame_->pts; - } - return decodedFrame; - } -} - -void FfmpegStream::flush(int getPtsOnly, DecoderOutput& decoderOutput) { - VLOG(1) << "Media Type: " << getMediaType() << ", flush stream."; - // need to receive frames before entering draining mode - receiveAvailFrames(getPtsOnly, decoderOutput); - - VLOG(2) << "send nullptr packet"; - sendPacket(nullptr); - // receive remaining frames after entering draining mode - receiveAvailFrames(getPtsOnly, decoderOutput); - - avcodec_flush_buffers(codecCtx_); -} - -bool FfmpegStream::isFramePtsInRange() { - CHECK(frame_); - auto pts = frame_->pts; - auto startPts = this->getStartPts(); - auto endPts = this->getEndPts(); - VLOG(2) << "isPtsInRange. pts: " << pts << ", startPts: " << startPts - << ", endPts: " << endPts; - return (pts == AV_NOPTS_VALUE) || - (pts >= startPts && (endPts >= 0 ? pts <= endPts : true)); -} - -bool FfmpegStream::isFramePtsExceedRange() { - if (frame_) { - auto endPts = this->getEndPts(); - VLOG(2) << "isFramePtsExceedRange. last_pts_: " << last_pts_ - << ", endPts: " << endPts; - return endPts >= 0 ? last_pts_ >= endPts : false; - } else { - return true; - } -} - -// seek a frame -int FfmpegStream::seekFrame(int64_t seekPts) { - // translate margin from second to pts - int64_t margin = (int64_t)( - seekFrameMargin_ * (double)inputCtx_->streams[index_]->time_base.den / - (double)inputCtx_->streams[index_]->time_base.num); - int64_t real_seekPts = (seekPts - margin) > 0 ? (seekPts - margin) : 0; - VLOG(2) << "seek margin: " << margin; - VLOG(2) << "real seekPts: " << real_seekPts; - int ret = av_seek_frame( - inputCtx_, - index_, - (seekPts - margin) > 0 ? (seekPts - margin) : 0, - AVSEEK_FLAG_BACKWARD); - if (ret < 0) { - LOG(WARNING) << "av_seek_frame fails. Stream index: " << index_; - return ret; - } - return 0; -} - -// send/receive encoding and decoding API overview -// https://ffmpeg.org/doxygen/3.4/group__lavc__encdec.html -int FfmpegStream::sendPacket(const AVPacket* packet) { - return avcodec_send_packet(codecCtx_, packet); -} - -int FfmpegStream::receiveFrame() { - int ret = avcodec_receive_frame(codecCtx_, frame_); - if (ret >= 0) { - // succeed - frame_->pts = av_frame_get_best_effort_timestamp(frame_); - if (frame_->pts == AV_NOPTS_VALUE) { - // Trick: if we can not figure out pts, we just set it to be (last_pts + - // 1) - frame_->pts = last_pts_ + 1; - } - last_pts_ = frame_->pts; - - VLOG(2) << "avcodec_receive_frame succeed"; - } else if (ret == AVERROR(EAGAIN)) { - VLOG(2) << "avcodec_receive_frame fails and returns AVERROR(EAGAIN). "; - } else if (ret == AVERROR_EOF) { - // no more frame to read - VLOG(2) << "avcodec_receive_frame returns AVERROR_EOF"; - } else { - LOG(WARNING) << "avcodec_receive_frame failed. Error: " - << ffmpeg_util::getErrorDesc(ret); - } - return ret; -} - -void FfmpegStream::receiveAvailFrames( - int getPtsOnly, - DecoderOutput& decoderOutput) { - int result = 0; - while ((result = receiveFrame()) >= 0) { - unique_ptr decodedFrame = getFrameData(getPtsOnly); - - if (decodedFrame && - ((!getPtsOnly && decodedFrame->frameSize_ > 0) || getPtsOnly)) { - if (isFramePtsInRange()) { - decoderOutput.addMediaFrame(getMediaType(), std::move(decodedFrame)); - } - } // end-if - } // end-while -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegStream.h b/torchvision/csrc/cpu/video_reader/FfmpegStream.h deleted file mode 100644 index b66a36977ec..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegStream.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. -#pragma once - -#include -#include -#include -#include "FfmpegHeaders.h" -#include "Interface.h" - -/* -Class uses FFMPEG library to decode one media stream (audio or video). -*/ -class FfmpegStream { - public: - FfmpegStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - double seekFrameMargin); - virtual ~FfmpegStream(); - - // returns 0 - on success or negative error - int openCodecContext(); - // returns stream index - int getIndex() const { - return index_; - } - // returns number decoded/sampled bytes - std::unique_ptr getFrameData(int getPtsOnly); - // flush the stream at the end of decoding. - // Return 0 on success and -1 when cache is drained - void flush(int getPtsOnly, DecoderOutput& decoderOutput); - // seek a frame - int seekFrame(int64_t ts); - // send an AVPacket - int sendPacket(const AVPacket* packet); - // receive AVFrame - int receiveFrame(); - // receive all available frames from the internal buffer - void receiveAvailFrames(int getPtsOnly, DecoderOutput& decoderOutput); - // return media type - virtual MediaType getMediaType() const = 0; - // return media format - virtual FormatUnion getMediaFormat() const = 0; - // return start presentation timestamp - virtual int64_t getStartPts() const = 0; - // return end presentation timestamp - virtual int64_t getEndPts() const = 0; - // is the pts of most recent frame within range? - bool isFramePtsInRange(); - // does the pts of most recent frame exceed range? - bool isFramePtsExceedRange(); - - protected: - virtual int initFormat() = 0; - // returns a decoded frame - virtual std::unique_ptr sampleFrameData() = 0; - - protected: - AVFormatContext* const inputCtx_; - const int index_; - enum AVMediaType avMediaType_; - - AVCodecContext* codecCtx_{nullptr}; - AVFrame* frame_{nullptr}; - // pts of last decoded frame - int64_t last_pts_{0}; - double seekFrameMargin_{1.0}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegUtil.cpp b/torchvision/csrc/cpu/video_reader/FfmpegUtil.cpp deleted file mode 100644 index 9e804ee67c0..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegUtil.cpp +++ /dev/null @@ -1,111 +0,0 @@ -#include "FfmpegUtil.h" - -using namespace std; - -namespace ffmpeg_util { - -bool mapFfmpegType(AVMediaType media, MediaType* type) { - switch (media) { - case AVMEDIA_TYPE_VIDEO: - *type = MediaType::TYPE_VIDEO; - return true; - case AVMEDIA_TYPE_AUDIO: - *type = MediaType::TYPE_AUDIO; - return true; - default: - return false; - } -} - -bool mapMediaType(MediaType type, AVMediaType* media) { - switch (type) { - case MediaType::TYPE_VIDEO: - *media = AVMEDIA_TYPE_VIDEO; - return true; - case MediaType::TYPE_AUDIO: - *media = AVMEDIA_TYPE_AUDIO; - return true; - default: - return false; - } -} - -void setFormatDimensions( - int& destW, - int& destH, - int userW, - int userH, - int srcW, - int srcH, - int minDimension) { - // rounding rules - // int -> double -> round - // round up if fraction is >= 0.5 or round down if fraction is < 0.5 - // int result = double(value) + 0.5 - // here we rounding double to int according to the above rule - if (userW == 0 && userH == 0) { - if (minDimension > 0) { // #2 - if (srcW > srcH) { - // landscape - destH = minDimension; - destW = round(double(srcW * minDimension) / srcH); - } else { - // portrait - destW = minDimension; - destH = round(double(srcH * minDimension) / srcW); - } - } else { // #1 - destW = srcW; - destH = srcH; - } - } else if (userW != 0 && userH == 0) { // #3 - destW = userW; - destH = round(double(srcH * userW) / srcW); - } else if (userW == 0 && userH != 0) { // #4 - destW = round(double(srcW * userH) / srcH); - destH = userH; - } else { - // userW != 0 && userH != 0. #5 - destW = userW; - destH = userH; - } - // prevent zeros - destW = std::max(destW, 1); - destH = std::max(destH, 1); -} - -bool validateVideoFormat(const VideoFormat& f) { - /* - Valid parameters values for decoder - ___________________________________________________ - | W | H | minDimension | algorithm | - |_________________________________________________| - | 0 | 0 | 0 | original | - |_________________________________________________| - | 0 | 0 | >0 |scale to min dimension| - |_____|_____|____________________________________ | - | >0 | 0 | 0 | scale keeping W | - |_________________________________________________| - | 0 | >0 | 0 | scale keeping H | - |_________________________________________________| - | >0 | >0 | 0 | stretch/scale | - |_________________________________________________| - - */ - return (f.width == 0 && f.height == 0) || // #1 and #2 - (f.width != 0 && f.height != 0 && f.minDimension == 0) || // # 5 - (((f.width != 0 && f.height == 0) || // #3 and #4 - (f.width == 0 && f.height != 0)) && - f.minDimension == 0); -} - -string getErrorDesc(int errnum) { - array buffer; - if (av_strerror(errnum, buffer.data(), buffer.size()) < 0) { - return string("Unknown error code"); - } - buffer.back() = 0; - return string(buffer.data()); -} - -} // namespace ffmpeg_util diff --git a/torchvision/csrc/cpu/video_reader/FfmpegUtil.h b/torchvision/csrc/cpu/video_reader/FfmpegUtil.h deleted file mode 100644 index 9f42eb53c97..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegUtil.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include -#include -#include "FfmpegHeaders.h" -#include "Interface.h" - -namespace ffmpeg_util { - -bool mapFfmpegType(AVMediaType media, enum MediaType* type); - -bool mapMediaType(MediaType type, enum AVMediaType* media); - -void setFormatDimensions( - int& destW, - int& destH, - int userW, - int userH, - int srcW, - int srcH, - int minDimension); - -bool validateVideoFormat(const VideoFormat& f); - -std::string getErrorDesc(int errnum); - -} // namespace ffmpeg_util diff --git a/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.cpp b/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.cpp deleted file mode 100644 index d87b3104dd5..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "FfmpegVideoSampler.h" -#include "FfmpegUtil.h" - -using namespace std; - -FfmpegVideoSampler::FfmpegVideoSampler( - const VideoFormat& in, - const VideoFormat& out, - int swsFlags) - : inFormat_(in), outFormat_(out), swsFlags_(swsFlags) {} - -FfmpegVideoSampler::~FfmpegVideoSampler() { - if (scaleContext_) { - sws_freeContext(scaleContext_); - scaleContext_ = nullptr; - } -} - -int FfmpegVideoSampler::init() { - VLOG(1) << "Input format: width " << inFormat_.width << ", height " - << inFormat_.height << ", format " << inFormat_.format - << ", minDimension " << inFormat_.minDimension; - VLOG(1) << "Scale format: width " << outFormat_.width << ", height " - << outFormat_.height << ", format " << outFormat_.format - << ", minDimension " << outFormat_.minDimension; - - scaleContext_ = sws_getContext( - inFormat_.width, - inFormat_.height, - (AVPixelFormat)inFormat_.format, - outFormat_.width, - outFormat_.height, - static_cast(outFormat_.format), - swsFlags_, - nullptr, - nullptr, - nullptr); - if (scaleContext_) { - return 0; - } else { - return -1; - } -} - -int32_t FfmpegVideoSampler::getImageBytes() const { - return av_image_get_buffer_size( - (AVPixelFormat)outFormat_.format, outFormat_.width, outFormat_.height, 1); -} - -// https://ffmpeg.org/doxygen/3.4/scaling_video_8c-example.html#a10 -unique_ptr FfmpegVideoSampler::sample(const AVFrame* frame) { - if (!frame) { - return nullptr; // no flush for videos - } - // scaled and cropped image - auto outImageSize = getImageBytes(); - AvDataPtr frameData(static_cast(av_malloc(outImageSize))); - - uint8_t* scalePlanes[4] = {nullptr}; - int scaleLines[4] = {0}; - - int result; - if ((result = av_image_fill_arrays( - scalePlanes, - scaleLines, - frameData.get(), - static_cast(outFormat_.format), - outFormat_.width, - outFormat_.height, - 1)) < 0) { - LOG(ERROR) << "av_image_fill_arrays failed, err: " - << ffmpeg_util::getErrorDesc(result); - return nullptr; - } - - if ((result = sws_scale( - scaleContext_, - frame->data, - frame->linesize, - 0, - inFormat_.height, - scalePlanes, - scaleLines)) < 0) { - LOG(ERROR) << "sws_scale failed, err: " - << ffmpeg_util::getErrorDesc(result); - return nullptr; - } - - return make_unique(std::move(frameData), outImageSize, 0); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.h b/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.h deleted file mode 100644 index 1fd6862f537..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegVideoSampler.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include "FfmpegSampler.h" - -/** - * Class transcode video frames from one format into another - */ - -class FfmpegVideoSampler : public FfmpegSampler { - public: - explicit FfmpegVideoSampler( - const VideoFormat& in, - const VideoFormat& out, - int swsFlags = SWS_AREA); - ~FfmpegVideoSampler() override; - - int init() override; - - int32_t getImageBytes() const; - // returns number of bytes of the sampled data - std::unique_ptr sample(const AVFrame* frame) override; - - const VideoFormat& getInFormat() const { - return inFormat_; - } - - private: - VideoFormat inFormat_; - VideoFormat outFormat_; - int swsFlags_; - SwsContext* scaleContext_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.cpp b/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.cpp deleted file mode 100644 index 7a429249a71..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include "FfmpegVideoStream.h" -#include "FfmpegUtil.h" - -using namespace std; - -namespace { - -bool operator==(const VideoFormat& x, const AVFrame& y) { - return x.width == y.width && x.height == y.height && - x.format == static_cast(y.format); -} - -VideoFormat toVideoFormat(const AVFrame& frame) { - VideoFormat videoFormat; - videoFormat.width = frame.width; - videoFormat.height = frame.height; - videoFormat.format = static_cast(frame.format); - - return videoFormat; -} - -} // namespace - -FfmpegVideoStream::FfmpegVideoStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - MediaFormat mediaFormat, - double seekFrameMargin) - : FfmpegStream(inputCtx, index, avMediaType, seekFrameMargin), - mediaFormat_(mediaFormat) {} - -FfmpegVideoStream::~FfmpegVideoStream() {} - -void FfmpegVideoStream::checkStreamDecodeParams() { - auto timeBase = getTimeBase(); - if (timeBase.first > 0) { - CHECK_EQ(timeBase.first, inputCtx_->streams[index_]->time_base.num); - CHECK_EQ(timeBase.second, inputCtx_->streams[index_]->time_base.den); - } -} - -void FfmpegVideoStream::updateStreamDecodeParams() { - auto timeBase = getTimeBase(); - if (timeBase.first == 0) { - mediaFormat_.format.video.timeBaseNum = - inputCtx_->streams[index_]->time_base.num; - mediaFormat_.format.video.timeBaseDen = - inputCtx_->streams[index_]->time_base.den; - } - mediaFormat_.format.video.duration = inputCtx_->streams[index_]->duration; -} - -int FfmpegVideoStream::initFormat() { - // set output format - VideoFormat& format = mediaFormat_.format.video; - if (!ffmpeg_util::validateVideoFormat(format)) { - LOG(ERROR) << "Invalid video format"; - return -1; - } - - format.fps = av_q2d( - av_guess_frame_rate(inputCtx_, inputCtx_->streams[index_], nullptr)); - - // keep aspect ratio - ffmpeg_util::setFormatDimensions( - format.width, - format.height, - format.width, - format.height, - codecCtx_->width, - codecCtx_->height, - format.minDimension); - - VLOG(1) << "After adjusting, video format" - << ", width: " << format.width << ", height: " << format.height - << ", format: " << format.format - << ", minDimension: " << format.minDimension; - - if (format.format == AV_PIX_FMT_NONE) { - format.format = codecCtx_->pix_fmt; - VLOG(1) << "Set pixel format: " << format.format; - } - - checkStreamDecodeParams(); - - updateStreamDecodeParams(); - - return format.width != 0 && format.height != 0 && - format.format != AV_PIX_FMT_NONE - ? 0 - : -1; -} - -unique_ptr FfmpegVideoStream::sampleFrameData() { - VideoFormat& format = mediaFormat_.format.video; - if (!sampler_ || !(sampler_->getInFormat() == *frame_)) { - VideoFormat newInFormat = toVideoFormat(*frame_); - sampler_ = make_unique(newInFormat, format, SWS_AREA); - VLOG(1) << "Set input video sampler format" - << ", width: " << newInFormat.width - << ", height: " << newInFormat.height - << ", format: " << newInFormat.format - << " : output video sampler format" - << ", width: " << format.width << ", height: " << format.height - << ", format: " << format.format - << ", minDimension: " << format.minDimension; - int ret = sampler_->init(); - if (ret < 0) { - VLOG(1) << "Fail to initialize video sampler"; - return nullptr; - } - } - return sampler_->sample(frame_); -} diff --git a/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.h b/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.h deleted file mode 100644 index 9bfbc9f665b..00000000000 --- a/torchvision/csrc/cpu/video_reader/FfmpegVideoStream.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include -#include "FfmpegStream.h" -#include "FfmpegVideoSampler.h" - -/** - * Class uses FFMPEG library to decode one video stream. - */ -class FfmpegVideoStream : public FfmpegStream { - public: - explicit FfmpegVideoStream( - AVFormatContext* inputCtx, - int index, - enum AVMediaType avMediaType, - MediaFormat mediaFormat, - double seekFrameMargin); - - ~FfmpegVideoStream() override; - - // FfmpegStream overrides - MediaType getMediaType() const override { - return MediaType::TYPE_VIDEO; - } - - FormatUnion getMediaFormat() const override { - return mediaFormat_.format; - } - - int64_t getStartPts() const override { - return mediaFormat_.format.video.startPts; - } - int64_t getEndPts() const override { - return mediaFormat_.format.video.endPts; - } - // return numerator and denominator of time base - std::pair getTimeBase() const { - return std::make_pair( - mediaFormat_.format.video.timeBaseNum, - mediaFormat_.format.video.timeBaseDen); - } - - void checkStreamDecodeParams(); - - void updateStreamDecodeParams(); - - protected: - int initFormat() override; - std::unique_ptr sampleFrameData() override; - - private: - MediaFormat mediaFormat_; - std::unique_ptr sampler_{nullptr}; -}; diff --git a/torchvision/csrc/cpu/video_reader/Interface.cpp b/torchvision/csrc/cpu/video_reader/Interface.cpp deleted file mode 100644 index 0ec9f155821..00000000000 --- a/torchvision/csrc/cpu/video_reader/Interface.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "Interface.h" - -void DecoderOutput::initMediaType(MediaType mediaType, FormatUnion format) { - MediaData mediaData(format); - media_data_.emplace(mediaType, std::move(mediaData)); -} - -void DecoderOutput::addMediaFrame( - MediaType mediaType, - std::unique_ptr frame) { - if (media_data_.find(mediaType) != media_data_.end()) { - VLOG(1) << "media type: " << mediaType - << " add frame with pts: " << frame->pts_; - media_data_[mediaType].frames_.push_back(std::move(frame)); - } else { - VLOG(1) << "media type: " << mediaType << " not found. Skip the frame."; - } -} - -void DecoderOutput::clear() { - media_data_.clear(); -} diff --git a/torchvision/csrc/cpu/video_reader/Interface.h b/torchvision/csrc/cpu/video_reader/Interface.h deleted file mode 100644 index e137008ce7b..00000000000 --- a/torchvision/csrc/cpu/video_reader/Interface.h +++ /dev/null @@ -1,127 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -extern "C" { - -#include -#include -void av_free(void* ptr); -} - -struct avDeleter { - void operator()(uint8_t* p) const { - av_free(p); - } -}; - -const AVPixelFormat defaultVideoPixelFormat = AV_PIX_FMT_RGB24; -const AVSampleFormat defaultAudioSampleFormat = AV_SAMPLE_FMT_FLT; - -using AvDataPtr = std::unique_ptr; - -enum MediaType : uint32_t { - TYPE_VIDEO = 1, - TYPE_AUDIO = 2, -}; - -struct EnumClassHash { - template - uint32_t operator()(T t) const { - return static_cast(t); - } -}; - -struct VideoFormat { - // fields are initialized for the auto detection - // caller can specify some/all of field values if specific output is desirable - - int width{0}; // width in pixels - int height{0}; // height in pixels - int minDimension{0}; // choose min dimension and rescale accordingly - // Output image pixel format. data type AVPixelFormat - AVPixelFormat format{defaultVideoPixelFormat}; // type AVPixelFormat - int64_t startPts{0}, endPts{0}; // Start and end presentation timestamp - int timeBaseNum{0}; - int timeBaseDen{1}; // numerator and denominator of time base - float fps{0.0}; - int64_t duration{0}; // duration of the stream, in stream time base -}; - -struct AudioFormat { - // fields are initialized for the auto detection - // caller can specify some/all of field values if specific output is desirable - - int samples{0}; // number samples per second (frequency) - int channels{0}; // number of channels - AVSampleFormat format{defaultAudioSampleFormat}; // type AVSampleFormat - int64_t startPts{0}, endPts{0}; // Start and end presentation timestamp - int timeBaseNum{0}; - int timeBaseDen{1}; // numerator and denominator of time base - int64_t duration{0}; // duration of the stream, in stream time base -}; - -union FormatUnion { - FormatUnion() {} - VideoFormat video; - AudioFormat audio; -}; - -struct MediaFormat { - MediaFormat() {} - - MediaFormat(const MediaFormat& mediaFormat) : type(mediaFormat.type) { - if (type == MediaType::TYPE_VIDEO) { - format.video = mediaFormat.format.video; - } else if (type == MediaType::TYPE_AUDIO) { - format.audio = mediaFormat.format.audio; - } - } - - MediaFormat(MediaType mediaType) : type(mediaType) { - if (mediaType == MediaType::TYPE_VIDEO) { - format.video = VideoFormat(); - } else if (mediaType == MediaType::TYPE_AUDIO) { - format.audio = AudioFormat(); - } - } - // media type - MediaType type; - // format data - FormatUnion format; -}; - -class DecodedFrame { - public: - explicit DecodedFrame() : frame_(nullptr), frameSize_(0), pts_(0) {} - explicit DecodedFrame(AvDataPtr frame, int frameSize, int64_t pts) - : frame_(std::move(frame)), frameSize_(frameSize), pts_(pts) {} - AvDataPtr frame_{nullptr}; - int frameSize_{0}; - int64_t pts_{0}; -}; - -struct MediaData { - MediaData() {} - MediaData(FormatUnion format) : format_(format) {} - FormatUnion format_; - std::vector> frames_; -}; - -class DecoderOutput { - public: - explicit DecoderOutput() {} - - ~DecoderOutput() {} - - void initMediaType(MediaType mediaType, FormatUnion format); - - void addMediaFrame(MediaType mediaType, std::unique_ptr frame); - - void clear(); - - std::unordered_map media_data_; -}; diff --git a/torchvision/csrc/cpu/video_reader/VideoReader.cpp b/torchvision/csrc/cpu/video_reader/VideoReader.cpp deleted file mode 100644 index dfe7f46bf39..00000000000 --- a/torchvision/csrc/cpu/video_reader/VideoReader.cpp +++ /dev/null @@ -1,500 +0,0 @@ -#include "VideoReader.h" -#include -#include -#include -#include -#include "FfmpegDecoder.h" -#include "FfmpegHeaders.h" -#include "util.h" - -using namespace std; - -// If we are in a Windows environment, we need to define -// initialization functions for the _custom_ops extension -#ifdef _WIN32 -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC init_video_reader(void) { - // No need to do anything. - return NULL; -} -#else -PyMODINIT_FUNC PyInit_video_reader(void) { - // No need to do anything. - return NULL; -} -#endif -#endif - -namespace video_reader { - -class UnknownPixelFormatException : public exception { - const char* what() const throw() override { - return "Unknown pixel format"; - } -}; - -int getChannels(AVPixelFormat format) { - int numChannels = 0; - switch (format) { - case AV_PIX_FMT_BGR24: - case AV_PIX_FMT_RGB24: - numChannels = 3; - break; - default: - LOG(ERROR) << "Unknown format: " << format; - throw UnknownPixelFormatException(); - } - return numChannels; -} - -void fillVideoTensor( - std::vector>& frames, - torch::Tensor& videoFrame, - torch::Tensor& videoFramePts) { - int frameSize = 0; - if (videoFrame.numel() > 0) { - frameSize = videoFrame.numel() / frames.size(); - } - - int frameCount = 0; - - uint8_t* videoFrameData = - videoFrame.numel() > 0 ? videoFrame.data_ptr() : nullptr; - int64_t* videoFramePtsData = videoFramePts.data_ptr(); - - for (size_t i = 0; i < frames.size(); ++i) { - const auto& frame = frames[i]; - if (videoFrameData) { - memcpy( - videoFrameData + (size_t)(frameCount++) * (size_t)frameSize, - frame->frame_.get(), - frameSize * sizeof(uint8_t)); - } - videoFramePtsData[i] = frame->pts_; - } -} - -void getVideoMeta( - DecoderOutput& decoderOutput, - int& numFrames, - int& height, - int& width, - int& numChannels) { - auto& videoFrames = decoderOutput.media_data_[TYPE_VIDEO].frames_; - numFrames = videoFrames.size(); - - FormatUnion& videoFormat = decoderOutput.media_data_[TYPE_VIDEO].format_; - height = videoFormat.video.height; - width = videoFormat.video.width; - numChannels = getChannels(videoFormat.video.format); -} - -void fillAudioTensor( - std::vector>& frames, - torch::Tensor& audioFrame, - torch::Tensor& audioFramePts) { - if (frames.size() == 0) { - return; - } - - float* audioFrameData = - audioFrame.numel() > 0 ? audioFrame.data_ptr() : nullptr; - CHECK_EQ(audioFramePts.size(0), frames.size()); - int64_t* audioFramePtsData = audioFramePts.data_ptr(); - - int bytesPerSample = av_get_bytes_per_sample(defaultAudioSampleFormat); - - int64_t frameDataOffset = 0; - for (size_t i = 0; i < frames.size(); ++i) { - audioFramePtsData[i] = frames[i]->pts_; - if (audioFrameData) { - memcpy( - audioFrameData + frameDataOffset, - frames[i]->frame_.get(), - frames[i]->frameSize_); - frameDataOffset += (frames[i]->frameSize_ / bytesPerSample); - } - } -} - -void getAudioMeta( - DecoderOutput& decoderOutput, - int64_t& numSamples, - int64_t& channels, - int64_t& numFrames) { - FormatUnion& audioFormat = decoderOutput.media_data_[TYPE_AUDIO].format_; - - channels = audioFormat.audio.channels; - CHECK_EQ(audioFormat.audio.format, AV_SAMPLE_FMT_FLT); - int bytesPerSample = av_get_bytes_per_sample( - static_cast(audioFormat.audio.format)); - - // auto& audioFrames = decoderOutput.media_frames_[TYPE_AUDIO]; - auto& audioFrames = decoderOutput.media_data_[TYPE_AUDIO].frames_; - numFrames = audioFrames.size(); - int64_t frameSizeTotal = 0; - for (auto const& decodedFrame : audioFrames) { - frameSizeTotal += static_cast(decodedFrame->frameSize_); - } - VLOG(2) << "numFrames: " << numFrames; - VLOG(2) << "frameSizeTotal: " << frameSizeTotal; - VLOG(2) << "channels: " << channels; - VLOG(2) << "bytesPerSample: " << bytesPerSample; - CHECK_EQ(frameSizeTotal % (channels * bytesPerSample), 0); - numSamples = frameSizeTotal / (channels * bytesPerSample); -} - -torch::List readVideo( - bool isReadFile, - const torch::Tensor& input_video, - std::string videoPath, - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int64_t width, - int64_t height, - int64_t minDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - int64_t readAudioStream, - int64_t audioSamples, - int64_t audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen) { - unique_ptr params = util::getDecoderParams( - seekFrameMargin, - getPtsOnly, - readVideoStream, - width, - height, - minDimension, - videoStartPts, - videoEndPts, - videoTimeBaseNum, - videoTimeBaseDen, - readAudioStream, - audioSamples, - audioChannels, - audioStartPts, - audioEndPts, - audioTimeBaseNum, - audioTimeBaseDen); - - FfmpegDecoder decoder; - DecoderOutput decoderOutput; - - if (isReadFile) { - decoder.decodeFile(std::move(params), videoPath, decoderOutput); - } else { - decoder.decodeMemory( - std::move(params), - input_video.data_ptr(), - input_video.size(0), - decoderOutput); - } - - // video section - torch::Tensor videoFrame = torch::zeros({0}, torch::kByte); - torch::Tensor videoFramePts = torch::zeros({0}, torch::kLong); - torch::Tensor videoTimeBase = torch::zeros({0}, torch::kInt); - torch::Tensor videoFps = torch::zeros({0}, torch::kFloat); - torch::Tensor videoDuration = torch::zeros({0}, torch::kLong); - - if (readVideoStream == 1) { - auto it = decoderOutput.media_data_.find(TYPE_VIDEO); - if (it != decoderOutput.media_data_.end()) { - int numVideoFrames, outHeight, outWidth, numChannels; - getVideoMeta( - decoderOutput, numVideoFrames, outHeight, outWidth, numChannels); - - if (getPtsOnly == 0) { - videoFrame = torch::zeros( - {numVideoFrames, outHeight, outWidth, numChannels}, torch::kByte); - } - - videoFramePts = torch::zeros({numVideoFrames}, torch::kLong); - - fillVideoTensor( - decoderOutput.media_data_[TYPE_VIDEO].frames_, - videoFrame, - videoFramePts); - - videoTimeBase = torch::zeros({2}, torch::kInt); - int* videoTimeBaseData = videoTimeBase.data_ptr(); - videoTimeBaseData[0] = it->second.format_.video.timeBaseNum; - videoTimeBaseData[1] = it->second.format_.video.timeBaseDen; - - videoFps = torch::zeros({1}, torch::kFloat); - float* videoFpsData = videoFps.data_ptr(); - videoFpsData[0] = it->second.format_.video.fps; - - videoDuration = torch::zeros({1}, torch::kLong); - int64_t* videoDurationData = videoDuration.data_ptr(); - videoDurationData[0] = it->second.format_.video.duration; - } else { - VLOG(1) << "Miss video stream"; - } - } - - // audio section - torch::Tensor audioFrame = torch::zeros({0}, torch::kFloat); - torch::Tensor audioFramePts = torch::zeros({0}, torch::kLong); - torch::Tensor audioTimeBase = torch::zeros({0}, torch::kInt); - torch::Tensor audioSampleRate = torch::zeros({0}, torch::kInt); - torch::Tensor audioDuration = torch::zeros({0}, torch::kLong); - if (readAudioStream == 1) { - auto it = decoderOutput.media_data_.find(TYPE_AUDIO); - if (it != decoderOutput.media_data_.end()) { - VLOG(1) << "Find audio stream"; - int64_t numAudioSamples = 0, outAudioChannels = 0, numAudioFrames = 0; - getAudioMeta( - decoderOutput, numAudioSamples, outAudioChannels, numAudioFrames); - VLOG(2) << "numAudioSamples: " << numAudioSamples; - VLOG(2) << "outAudioChannels: " << outAudioChannels; - VLOG(2) << "numAudioFrames: " << numAudioFrames; - - if (getPtsOnly == 0) { - audioFrame = - torch::zeros({numAudioSamples, outAudioChannels}, torch::kFloat); - } - audioFramePts = torch::zeros({numAudioFrames}, torch::kLong); - fillAudioTensor( - decoderOutput.media_data_[TYPE_AUDIO].frames_, - audioFrame, - audioFramePts); - - audioTimeBase = torch::zeros({2}, torch::kInt); - int* audioTimeBaseData = audioTimeBase.data_ptr(); - audioTimeBaseData[0] = it->second.format_.audio.timeBaseNum; - audioTimeBaseData[1] = it->second.format_.audio.timeBaseDen; - - audioSampleRate = torch::zeros({1}, torch::kInt); - int* audioSampleRateData = audioSampleRate.data_ptr(); - audioSampleRateData[0] = it->second.format_.audio.samples; - - audioDuration = torch::zeros({1}, torch::kLong); - int64_t* audioDurationData = audioDuration.data_ptr(); - audioDurationData[0] = it->second.format_.audio.duration; - } else { - VLOG(1) << "Miss audio stream"; - } - } - - torch::List result; - result.push_back(std::move(videoFrame)); - result.push_back(std::move(videoFramePts)); - result.push_back(std::move(videoTimeBase)); - result.push_back(std::move(videoFps)); - result.push_back(std::move(videoDuration)); - result.push_back(std::move(audioFrame)); - result.push_back(std::move(audioFramePts)); - result.push_back(std::move(audioTimeBase)); - result.push_back(std::move(audioSampleRate)); - result.push_back(std::move(audioDuration)); - - return result; -} - -torch::List readVideoFromMemory( - torch::Tensor input_video, - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int64_t width, - int64_t height, - int64_t minDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - int64_t readAudioStream, - int64_t audioSamples, - int64_t audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen) { - return readVideo( - false, - input_video, - "", // videoPath - seekFrameMargin, - getPtsOnly, - readVideoStream, - width, - height, - minDimension, - videoStartPts, - videoEndPts, - videoTimeBaseNum, - videoTimeBaseDen, - readAudioStream, - audioSamples, - audioChannels, - audioStartPts, - audioEndPts, - audioTimeBaseNum, - audioTimeBaseDen); -} - -torch::List readVideoFromFile( - std::string videoPath, - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int64_t width, - int64_t height, - int64_t minDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - int64_t readAudioStream, - int64_t audioSamples, - int64_t audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen) { - torch::Tensor dummy_input_video = torch::ones({0}); - return readVideo( - true, - dummy_input_video, - videoPath, - seekFrameMargin, - getPtsOnly, - readVideoStream, - width, - height, - minDimension, - videoStartPts, - videoEndPts, - videoTimeBaseNum, - videoTimeBaseDen, - readAudioStream, - audioSamples, - audioChannels, - audioStartPts, - audioEndPts, - audioTimeBaseNum, - audioTimeBaseDen); -} - -torch::List probeVideo( - bool isReadFile, - const torch::Tensor& input_video, - std::string videoPath) { - unique_ptr params = util::getDecoderParams( - 0, // seekFrameMargin - 0, // getPtsOnly - 1, // readVideoStream - 0, // width - 0, // height - 0, // minDimension - 0, // videoStartPts - 0, // videoEndPts - 0, // videoTimeBaseNum - 1, // videoTimeBaseDen - 1, // readAudioStream - 0, // audioSamples - 0, // audioChannels - 0, // audioStartPts - 0, // audioEndPts - 0, // audioTimeBaseNum - 1 // audioTimeBaseDen - ); - - FfmpegDecoder decoder; - DecoderOutput decoderOutput; - if (isReadFile) { - decoder.probeFile(std::move(params), videoPath, decoderOutput); - } else { - decoder.probeMemory( - std::move(params), - input_video.data_ptr(), - input_video.size(0), - decoderOutput); - } - // video section - torch::Tensor videoTimeBase = torch::zeros({0}, torch::kInt); - torch::Tensor videoFps = torch::zeros({0}, torch::kFloat); - torch::Tensor videoDuration = torch::zeros({0}, torch::kLong); - - auto it = decoderOutput.media_data_.find(TYPE_VIDEO); - if (it != decoderOutput.media_data_.end()) { - VLOG(1) << "Find video stream"; - videoTimeBase = torch::zeros({2}, torch::kInt); - int* videoTimeBaseData = videoTimeBase.data_ptr(); - videoTimeBaseData[0] = it->second.format_.video.timeBaseNum; - videoTimeBaseData[1] = it->second.format_.video.timeBaseDen; - - videoFps = torch::zeros({1}, torch::kFloat); - float* videoFpsData = videoFps.data_ptr(); - videoFpsData[0] = it->second.format_.video.fps; - - videoDuration = torch::zeros({1}, torch::kLong); - int64_t* videoDurationData = videoDuration.data_ptr(); - videoDurationData[0] = it->second.format_.video.duration; - } else { - VLOG(1) << "Miss video stream"; - } - - // audio section - torch::Tensor audioTimeBase = torch::zeros({0}, torch::kInt); - torch::Tensor audioSampleRate = torch::zeros({0}, torch::kInt); - torch::Tensor audioDuration = torch::zeros({0}, torch::kLong); - - it = decoderOutput.media_data_.find(TYPE_AUDIO); - if (it != decoderOutput.media_data_.end()) { - VLOG(1) << "Find audio stream"; - audioTimeBase = torch::zeros({2}, torch::kInt); - int* audioTimeBaseData = audioTimeBase.data_ptr(); - audioTimeBaseData[0] = it->second.format_.audio.timeBaseNum; - audioTimeBaseData[1] = it->second.format_.audio.timeBaseDen; - - audioSampleRate = torch::zeros({1}, torch::kInt); - int* audioSampleRateData = audioSampleRate.data_ptr(); - audioSampleRateData[0] = it->second.format_.audio.samples; - - audioDuration = torch::zeros({1}, torch::kLong); - int64_t* audioDurationData = audioDuration.data_ptr(); - audioDurationData[0] = it->second.format_.audio.duration; - } else { - VLOG(1) << "Miss audio stream"; - } - - torch::List result; - result.push_back(std::move(videoTimeBase)); - result.push_back(std::move(videoFps)); - result.push_back(std::move(videoDuration)); - result.push_back(std::move(audioTimeBase)); - result.push_back(std::move(audioSampleRate)); - result.push_back(std::move(audioDuration)); - - return result; -} - -torch::List probeVideoFromMemory(torch::Tensor input_video) { - return probeVideo(false, input_video, ""); -} - -torch::List probeVideoFromFile(std::string videoPath) { - torch::Tensor dummy_input_video = torch::ones({0}); - return probeVideo(true, dummy_input_video, videoPath); -} - -} // namespace video_reader - -static auto registry = torch::RegisterOperators() - .op("video_reader::read_video_from_memory", - &video_reader::readVideoFromMemory) - .op("video_reader::read_video_from_file", - &video_reader::readVideoFromFile) - .op("video_reader::probe_video_from_memory", - &video_reader::probeVideoFromMemory) - .op("video_reader::probe_video_from_file", - &video_reader::probeVideoFromFile); diff --git a/torchvision/csrc/cpu/video_reader/VideoReader.h b/torchvision/csrc/cpu/video_reader/VideoReader.h deleted file mode 100644 index efc2e4709a6..00000000000 --- a/torchvision/csrc/cpu/video_reader/VideoReader.h +++ /dev/null @@ -1,99 +0,0 @@ -#pragma once - -#include - -// Interface for Python - -/* - return: - videoFrame: tensor (N, H, W, C) kByte - videoFramePts: tensor (N) kLong - videoTimeBase: tensor (2) kInt - videoFps: tensor (1) kFloat - audioFrame: tensor (N, C) kFloat - audioFramePts: tensor (N) kLong - audioTimeBase: tensor (2) kInt - audioSampleRate: tensor (1) kInt -*/ -torch::List readVideoFromMemory( - // 1D tensor of data type uint8, storing the comparessed video data - torch::Tensor input_video, - // seeking frame in the video/audio stream is imprecise so seek to a - // timestamp earlier by a margin The unit of margin is second - double seekFrameMargin, - // If only pts is needed and video/audio frames are not needed, set it - // to 1 - int64_t getPtsOnly, - // bool variable. Set it to 1 if video stream should be read. Otherwise, set - // it to 0 - int64_t readVideoStream, - /* - Valid parameters values for rescaling video frames - ___________________________________________________ - | width | height | min_dimension | algorithm | - |_________________________________________________| - | 0 | 0 | 0 | original | - |_________________________________________________| - | 0 | 0 | >0 |scale to min dimension| - |_____|_____|____________________________________ | - | >0 | 0 | 0 | scale keeping W | - |_________________________________________________| - | 0 | >0 | 0 | scale keeping H | - |_________________________________________________| - | >0 | >0 | 0 | stretch/scale | - |_________________________________________________| - */ - int64_t width, - int64_t height, - int64_t minDimension, - // video frames with pts in [videoStartPts, videoEndPts] will be decoded - // For decoding all video frames, use [0, -1] - int64_t videoStartPts, - int64_t videoEndPts, - // numerator and denominator of time base of video stream. - // For decoding all video frames, supply dummy 0 (numerator) and 1 - // (denominator). For decoding localized video frames, need to supply - // them which will be checked during decoding - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - // bool variable. Set it to 1 if audio stream should be read. Otherwise, set - // it to 0 - int64_t readAudioStream, - // audio stream sampling rate. - // If not resampling audio waveform, supply 0 - // Otherwise, supply a positive integer. - int64_t audioSamples, - // audio stream channels - // Supply 0 to use the same number of channels as in the original audio - // stream - int64_t audioChannels, - // audio frames with pts in [audioStartPts, audioEndPts] will be decoded - // For decoding all audio frames, use [0, -1] - int64_t audioStartPts, - int64_t audioEndPts, - // numerator and denominator of time base of audio stream. - // For decoding all audio frames, supply dummy 0 (numerator) and 1 - // (denominator). For decoding localized audio frames, need to supply - // them which will be checked during decoding - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen); - -torch::List readVideoFromFile( - std::string videoPath, - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int64_t width, - int64_t height, - int64_t minDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int64_t videoTimeBaseNum, - int64_t videoTimeBaseDen, - int64_t readAudioStream, - int64_t audioSamples, - int64_t audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int64_t audioTimeBaseNum, - int64_t audioTimeBaseDen); diff --git a/torchvision/csrc/cpu/video_reader/util.cpp b/torchvision/csrc/cpu/video_reader/util.cpp deleted file mode 100644 index ae3c3df0f0a..00000000000 --- a/torchvision/csrc/cpu/video_reader/util.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "util.h" - -using namespace std; - -namespace util { - -unique_ptr getDecoderParams( - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int videoWidth, - int videoHeight, - int videoMinDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int videoTimeBaseNum, - int videoTimeBaseDen, - int64_t readAudioStream, - int audioSamples, - int audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int audioTimeBaseNum, - int audioTimeBaseDen) { - unique_ptr params = make_unique(); - - if (readVideoStream == 1) { - params->formats.emplace( - MediaType::TYPE_VIDEO, MediaFormat(MediaType::TYPE_VIDEO)); - MediaFormat& videoFormat = params->formats[MediaType::TYPE_VIDEO]; - - videoFormat.format.video.width = videoWidth; - videoFormat.format.video.height = videoHeight; - videoFormat.format.video.minDimension = videoMinDimension; - videoFormat.format.video.startPts = videoStartPts; - videoFormat.format.video.endPts = videoEndPts; - videoFormat.format.video.timeBaseNum = videoTimeBaseNum; - videoFormat.format.video.timeBaseDen = videoTimeBaseDen; - } - - if (readAudioStream == 1) { - params->formats.emplace( - MediaType::TYPE_AUDIO, MediaFormat(MediaType::TYPE_AUDIO)); - MediaFormat& audioFormat = params->formats[MediaType::TYPE_AUDIO]; - - audioFormat.format.audio.samples = audioSamples; - audioFormat.format.audio.channels = audioChannels; - audioFormat.format.audio.startPts = audioStartPts; - audioFormat.format.audio.endPts = audioEndPts; - audioFormat.format.audio.timeBaseNum = audioTimeBaseNum; - audioFormat.format.audio.timeBaseDen = audioTimeBaseDen; - } - - params->seekFrameMargin = seekFrameMargin; - params->getPtsOnly = getPtsOnly; - - return params; -} - -} // namespace util diff --git a/torchvision/csrc/cpu/video_reader/util.h b/torchvision/csrc/cpu/video_reader/util.h deleted file mode 100644 index 6b5fd55388b..00000000000 --- a/torchvision/csrc/cpu/video_reader/util.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once -#include -#include "FfmpegDecoder.h" - -namespace util { - -std::unique_ptr getDecoderParams( - double seekFrameMargin, - int64_t getPtsOnly, - int64_t readVideoStream, - int videoWidth, - int videoHeight, - int videoMinDimension, - int64_t videoStartPts, - int64_t videoEndPts, - int videoTimeBaseNum, - int videoTimeBaseDen, - int64_t readAudioStream, - int audioSamples, - int audioChannels, - int64_t audioStartPts, - int64_t audioEndPts, - int audioTimeBaseNum, - int audioTimeBaseDen); - -} // namespace util diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h deleted file mode 100644 index d84b172ba49..00000000000 --- a/torchvision/csrc/cpu/vision_cpu.h +++ /dev/null @@ -1,86 +0,0 @@ -#pragma once -#include - -std::tuple ROIPool_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor ROIPool_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -at::Tensor ROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); - -std::tuple PSROIPool_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor PSROIPool_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -std::tuple PSROIAlign_forward_cpu( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor PSROIAlign_backward_cpu( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const int batch_size, - const int channels, - const int height, - const int width); - -at::Tensor nms_cpu( - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold); diff --git a/torchvision/csrc/cuda/cuda_helpers.h b/torchvision/csrc/cuda/cuda_helpers.h deleted file mode 100644 index af32f60e815..00000000000 --- a/torchvision/csrc/cuda/cuda_helpers.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \ - i += (blockDim.x * gridDim.x)) diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h deleted file mode 100644 index b35c4c909c1..00000000000 --- a/torchvision/csrc/cuda/vision_cuda.h +++ /dev/null @@ -1,87 +0,0 @@ -#pragma once -#include -#include - -at::Tensor ROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor ROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width, - const int sampling_ratio); - -std::tuple ROIPool_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor ROIPool_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& argmax, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -std::tuple PSROIPool_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width); - -at::Tensor PSROIPool_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int batch_size, - const int channels, - const int height, - const int width); - -std::tuple PSROIAlign_forward_cuda( - const at::Tensor& input, - const at::Tensor& rois, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio); - -at::Tensor PSROIAlign_backward_cuda( - const at::Tensor& grad, - const at::Tensor& rois, - const at::Tensor& mapping_channel, - const float spatial_scale, - const int pooled_height, - const int pooled_width, - const int sampling_ratio, - const int batch_size, - const int channels, - const int height, - const int width); - -at::Tensor nms_cuda( - const at::Tensor& dets, - const at::Tensor& scores, - const float iou_threshold); diff --git a/torchvision/csrc/io/decoder/audio_sampler.cpp b/torchvision/csrc/io/decoder/audio_sampler.cpp new file mode 100644 index 00000000000..421e503b2ce --- /dev/null +++ b/torchvision/csrc/io/decoder/audio_sampler.cpp @@ -0,0 +1,233 @@ +#include "audio_sampler.h" +#include +#include "util.h" + +#define AVRESAMPLE_MAX_CHANNELS 32 + +// www.ffmpeg.org/doxygen/1.1/doc_2examples_2resampling_audio_8c-example.html#a24 +namespace ffmpeg { + +namespace { +int preparePlanes( + const AudioFormat& fmt, + const uint8_t* buffer, + int numSamples, + uint8_t** planes) { + int result; + if ((result = av_samples_fill_arrays( + planes, + nullptr, // linesize is not needed + buffer, + fmt.channels, + numSamples, + (AVSampleFormat)fmt.format, + 1)) < 0) { + LOG(ERROR) << "av_samples_fill_arrays failed, err: " + << Util::generateErrorDesc(result) + << ", numSamples: " << numSamples << ", fmt: " << fmt.format; + } + return result; +} +} // namespace + +AudioSampler::AudioSampler(void* logCtx) : logCtx_(logCtx) {} + +AudioSampler::~AudioSampler() { + cleanUp(); +} + +void AudioSampler::shutdown() { + cleanUp(); +} + +bool AudioSampler::init(const SamplerParameters& params) { + cleanUp(); + + if (params.type != MediaType::TYPE_AUDIO) { + LOG(ERROR) << "Invalid media type, expected MediaType::TYPE_AUDIO"; + return false; + } + + swrContext_ = swr_alloc_set_opts( + nullptr, + av_get_default_channel_layout(params.out.audio.channels), + (AVSampleFormat)params.out.audio.format, + params.out.audio.samples, + av_get_default_channel_layout(params.in.audio.channels), + (AVSampleFormat)params.in.audio.format, + params.in.audio.samples, + 0, + logCtx_); + if (swrContext_ == nullptr) { + LOG(ERROR) << "Cannot allocate SwrContext"; + return false; + } + + int result; + if ((result = swr_init(swrContext_)) < 0) { + LOG(ERROR) << "swr_init faield, err: " << Util::generateErrorDesc(result) + << ", in -> format: " << params.in.audio.format + << ", channels: " << params.in.audio.channels + << ", samples: " << params.in.audio.samples + << ", out -> format: " << params.out.audio.format + << ", channels: " << params.out.audio.channels + << ", samples: " << params.out.audio.samples; + return false; + } + + // set formats + params_ = params; + return true; +} + +int AudioSampler::numOutputSamples(int inSamples) const { + return swr_get_out_samples(swrContext_, inSamples); +} + +int AudioSampler::sample( + const uint8_t* inPlanes[], + int inNumSamples, + ByteStorage* out, + int outNumSamples) { + int result; + int outBufferBytes = av_samples_get_buffer_size( + nullptr, + params_.out.audio.channels, + outNumSamples, + (AVSampleFormat)params_.out.audio.format, + 1); + + if (out) { + out->ensure(outBufferBytes); + + uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr}; + + if ((result = preparePlanes( + params_.out.audio, + out->writableTail(), + outNumSamples, + outPlanes)) < 0) { + return result; + } + + if ((result = swr_convert( + swrContext_, + &outPlanes[0], + outNumSamples, + inPlanes, + inNumSamples)) < 0) { + LOG(ERROR) << "swr_convert faield, err: " + << Util::generateErrorDesc(result); + return result; + } + + CHECK_LE(result, outNumSamples); + + if (result) { + if ((result = av_samples_get_buffer_size( + nullptr, + params_.out.audio.channels, + result, + (AVSampleFormat)params_.out.audio.format, + 1)) >= 0) { + out->append(result); + } else { + LOG(ERROR) << "av_samples_get_buffer_size faield, err: " + << Util::generateErrorDesc(result); + } + } + } else { + // allocate a temporary buffer + auto* tmpBuffer = static_cast(av_malloc(outBufferBytes)); + if (!tmpBuffer) { + LOG(ERROR) << "av_alloc faield, for size: " << outBufferBytes; + return -1; + } + + uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr}; + + if ((result = preparePlanes( + params_.out.audio, tmpBuffer, outNumSamples, outPlanes)) < 0) { + av_free(tmpBuffer); + return result; + } + + if ((result = swr_convert( + swrContext_, + &outPlanes[0], + outNumSamples, + inPlanes, + inNumSamples)) < 0) { + LOG(ERROR) << "swr_convert faield, err: " + << Util::generateErrorDesc(result); + av_free(tmpBuffer); + return result; + } + + av_free(tmpBuffer); + + CHECK_LE(result, outNumSamples); + + if (result) { + result = av_samples_get_buffer_size( + nullptr, + params_.out.audio.channels, + result, + (AVSampleFormat)params_.out.audio.format, + 1); + } + } + + return result; +} + +int AudioSampler::sample(AVFrame* frame, ByteStorage* out) { + const auto outNumSamples = numOutputSamples(frame ? frame->nb_samples : 0); + + if (!outNumSamples) { + return 0; + } + + return sample( + frame ? (const uint8_t**)&frame->data[0] : nullptr, + frame ? frame->nb_samples : 0, + out, + outNumSamples); +} + +int AudioSampler::sample(const ByteStorage* in, ByteStorage* out) { + const auto inSampleSize = + av_get_bytes_per_sample((AVSampleFormat)params_.in.audio.format); + + const auto inNumSamples = + !in ? 0 : in->length() / inSampleSize / params_.in.audio.channels; + + const auto outNumSamples = numOutputSamples(inNumSamples); + + if (!outNumSamples) { + return 0; + } + + uint8_t* inPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr}; + int result; + if (in && + (result = preparePlanes( + params_.in.audio, in->data(), inNumSamples, inPlanes)) < 0) { + return result; + } + + return sample( + in ? (const uint8_t**)inPlanes : nullptr, + inNumSamples, + out, + outNumSamples); +} + +void AudioSampler::cleanUp() { + if (swrContext_) { + swr_free(&swrContext_); + swrContext_ = nullptr; + } +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/audio_sampler.h b/torchvision/csrc/io/decoder/audio_sampler.h new file mode 100644 index 00000000000..e105bbe4de2 --- /dev/null +++ b/torchvision/csrc/io/decoder/audio_sampler.h @@ -0,0 +1,39 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class transcode audio frames from one format into another + */ + +class AudioSampler : public MediaSampler { + public: + explicit AudioSampler(void* logCtx); + ~AudioSampler() override; + + // MediaSampler overrides + bool init(const SamplerParameters& params) override; + int sample(const ByteStorage* in, ByteStorage* out) override; + void shutdown() override; + + int sample(AVFrame* frame, ByteStorage* out); + + private: + // close resources + void cleanUp(); + // helper functions for rescaling, cropping, etc. + int numOutputSamples(int inSamples) const; + int sample( + const uint8_t* inPlanes[], + int inNumSamples, + ByteStorage* out, + int outNumSamples); + + private: + SwrContext* swrContext_{nullptr}; + void* logCtx_{nullptr}; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/audio_stream.cpp b/torchvision/csrc/io/decoder/audio_stream.cpp new file mode 100644 index 00000000000..9d66e589bf3 --- /dev/null +++ b/torchvision/csrc/io/decoder/audio_stream.cpp @@ -0,0 +1,101 @@ +#include "audio_stream.h" +#include +#include +#include "util.h" + +namespace ffmpeg { + +namespace { +bool operator==(const AudioFormat& x, const AVFrame& y) { + return x.samples == y.sample_rate && x.channels == y.channels && + x.format == y.format; +} + +bool operator==(const AudioFormat& x, const AVCodecContext& y) { + return x.samples == y.sample_rate && x.channels == y.channels && + x.format == y.sample_fmt; +} + +AudioFormat& toAudioFormat(AudioFormat& x, const AVFrame& y) { + x.samples = y.sample_rate; + x.channels = y.channels; + x.format = y.format; + return x; +} + +AudioFormat& toAudioFormat(AudioFormat& x, const AVCodecContext& y) { + x.samples = y.sample_rate; + x.channels = y.channels; + x.format = y.sample_fmt; + return x; +} +} // namespace + +AudioStream::AudioStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const AudioFormat& format) + : Stream( + inputCtx, + MediaFormat::makeMediaFormat(format, index), + convertPtsToWallTime, + 0) {} + +AudioStream::~AudioStream() { + if (sampler_) { + sampler_->shutdown(); + sampler_.reset(); + } +} + +int AudioStream::initFormat() { + // set output format + if (format_.format.audio.samples == 0) { + format_.format.audio.samples = codecCtx_->sample_rate; + } + if (format_.format.audio.channels == 0) { + format_.format.audio.channels = codecCtx_->channels; + } + if (format_.format.audio.format == AV_SAMPLE_FMT_NONE) { + format_.format.audio.format = codecCtx_->sample_fmt; + } + + return format_.format.audio.samples != 0 && + format_.format.audio.channels != 0 && + format_.format.audio.format != AV_SAMPLE_FMT_NONE + ? 0 + : -1; +} + +int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) { + if (!sampler_) { + sampler_ = std::make_unique(codecCtx_); + } + // check if input format gets changed + if (flush ? !(sampler_->getInputFormat().audio == *codecCtx_) + : !(sampler_->getInputFormat().audio == *frame_)) { + // - reinit sampler + SamplerParameters params; + params.type = format_.type; + params.out = format_.format; + params.in = FormatUnion(); + flush ? toAudioFormat(params.in.audio, *codecCtx_) + : toAudioFormat(params.in.audio, *frame_); + if (!sampler_->init(params)) { + return -1; + } + + VLOG(1) << "Set input audio sampler format" + << ", samples: " << params.in.audio.samples + << ", channels: " << params.in.audio.channels + << ", format: " << params.in.audio.format + << " : output audio sampler format" + << ", samples: " << format_.format.audio.samples + << ", channels: " << format_.format.audio.channels + << ", format: " << format_.format.audio.format; + } + return sampler_->sample(flush ? nullptr : frame_, out); +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/audio_stream.h b/torchvision/csrc/io/decoder/audio_stream.h new file mode 100644 index 00000000000..2d6457b68f5 --- /dev/null +++ b/torchvision/csrc/io/decoder/audio_stream.h @@ -0,0 +1,29 @@ +#pragma once + +#include "audio_sampler.h" +#include "stream.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one audio stream. + */ + +class AudioStream : public Stream { + public: + AudioStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const AudioFormat& format); + ~AudioStream() override; + + private: + int initFormat() override; + int copyFrameBytes(ByteStorage* out, bool flush) override; + + private: + std::unique_ptr sampler_; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/cc_stream.cpp b/torchvision/csrc/io/decoder/cc_stream.cpp new file mode 100644 index 00000000000..89174c396fd --- /dev/null +++ b/torchvision/csrc/io/decoder/cc_stream.cpp @@ -0,0 +1,24 @@ +#include "cc_stream.h" + +namespace ffmpeg { + +CCStream::CCStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const SubtitleFormat& format) + : SubtitleStream(inputCtx, index, convertPtsToWallTime, format) { + format_.type = TYPE_CC; +} + +AVCodec* CCStream::findCodec(AVCodecParameters* params) { + if (params->codec_id == AV_CODEC_ID_BIN_DATA && + params->codec_type == AVMEDIA_TYPE_DATA) { + // obtain subtitles codec + params->codec_id = AV_CODEC_ID_MOV_TEXT; + params->codec_type = AVMEDIA_TYPE_SUBTITLE; + } + return Stream::findCodec(params); +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/cc_stream.h b/torchvision/csrc/io/decoder/cc_stream.h new file mode 100644 index 00000000000..3a1d169f014 --- /dev/null +++ b/torchvision/csrc/io/decoder/cc_stream.h @@ -0,0 +1,22 @@ +#pragma once + +#include "subtitle_stream.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one closed captions stream. + */ +class CCStream : public SubtitleStream { + public: + CCStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const SubtitleFormat& format); + + private: + AVCodec* findCodec(AVCodecParameters* params) override; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/decoder.cpp b/torchvision/csrc/io/decoder/decoder.cpp new file mode 100644 index 00000000000..6c9a3cdf825 --- /dev/null +++ b/torchvision/csrc/io/decoder/decoder.cpp @@ -0,0 +1,665 @@ +#include "decoder.h" +#include +#include +#include +#include +#include "audio_stream.h" +#include "cc_stream.h" +#include "subtitle_stream.h" +#include "util.h" +#include "video_stream.h" + +namespace ffmpeg { + +namespace { + +constexpr size_t kIoBufferSize = 96 * 1024; +constexpr size_t kIoPaddingSize = AV_INPUT_BUFFER_PADDING_SIZE; +constexpr size_t kLogBufferSize = 1024; + +int ffmpeg_lock(void** mutex, enum AVLockOp op) { + std::mutex** handle = (std::mutex**)mutex; + switch (op) { + case AV_LOCK_CREATE: + *handle = new std::mutex(); + break; + case AV_LOCK_OBTAIN: + (*handle)->lock(); + break; + case AV_LOCK_RELEASE: + (*handle)->unlock(); + break; + case AV_LOCK_DESTROY: + delete *handle; + break; + } + return 0; +} + +bool mapFfmpegType(AVMediaType media, MediaType* type) { + switch (media) { + case AVMEDIA_TYPE_AUDIO: + *type = TYPE_AUDIO; + return true; + case AVMEDIA_TYPE_VIDEO: + *type = TYPE_VIDEO; + return true; + case AVMEDIA_TYPE_SUBTITLE: + *type = TYPE_SUBTITLE; + return true; + case AVMEDIA_TYPE_DATA: + *type = TYPE_CC; + return true; + default: + return false; + } +} + +std::unique_ptr createStream( + MediaType type, + AVFormatContext* ctx, + int idx, + bool convertPtsToWallTime, + const FormatUnion& format, + int64_t loggingUuid) { + switch (type) { + case TYPE_AUDIO: + return std::make_unique( + ctx, idx, convertPtsToWallTime, format.audio); + case TYPE_VIDEO: + return std::make_unique( + // negative loggingUuid indicates video streams. + ctx, + idx, + convertPtsToWallTime, + format.video, + -loggingUuid); + case TYPE_SUBTITLE: + return std::make_unique( + ctx, idx, convertPtsToWallTime, format.subtitle); + case TYPE_CC: + return std::make_unique( + ctx, idx, convertPtsToWallTime, format.subtitle); + default: + return nullptr; + } +} + +} // Namespace + +/* static */ +void Decoder::logFunction(void* avcl, int level, const char* cfmt, va_list vl) { + if (!avcl) { + // Nothing can be done here + return; + } + + AVClass* avclass = *reinterpret_cast(avcl); + if (!avclass) { + // Nothing can be done here + return; + } + Decoder* decoder = nullptr; + if (strcmp(avclass->class_name, "AVFormatContext") == 0) { + AVFormatContext* context = reinterpret_cast(avcl); + if (context) { + decoder = reinterpret_cast(context->opaque); + } + } else if (strcmp(avclass->class_name, "AVCodecContext") == 0) { + AVCodecContext* context = reinterpret_cast(avcl); + if (context) { + decoder = reinterpret_cast(context->opaque); + } + } else if (strcmp(avclass->class_name, "AVIOContext") == 0) { + AVIOContext* context = reinterpret_cast(avcl); + // only if opaque was assigned to Decoder pointer + if (context && context->read_packet == Decoder::readFunction) { + decoder = reinterpret_cast(context->opaque); + } + } else if (strcmp(avclass->class_name, "SWResampler") == 0) { + // expect AVCodecContext as parent + if (avclass->parent_log_context_offset) { + AVClass** parent = + *(AVClass***)(((uint8_t*)avcl) + avclass->parent_log_context_offset); + AVCodecContext* context = reinterpret_cast(parent); + if (context) { + decoder = reinterpret_cast(context->opaque); + } + } + } else if (strcmp(avclass->class_name, "SWScaler") == 0) { + // cannot find a way to pass context pointer through SwsContext struct + } else { + VLOG(2) << "Unknown context class: " << avclass->class_name; + } + + if (decoder != nullptr && decoder->enableLogLevel(level)) { + char buf[kLogBufferSize] = {0}; + // Format the line + int* prefix = decoder->getPrintPrefix(); + *prefix = 1; + av_log_format_line(avcl, level, cfmt, vl, buf, sizeof(buf) - 1, prefix); + // pass message to the decoder instance + std::string msg(buf); + decoder->logCallback(level, msg); + } +} + +bool Decoder::enableLogLevel(int level) const { + return ssize_t(level) <= params_.logLevel; +} + +void Decoder::logCallback(int level, const std::string& message) { + LOG(INFO) << "Msg, uuid=" << params_.loggingUuid << " level=" << level + << " msg=" << message; +} + +/* static */ +int Decoder::shutdownFunction(void* ctx) { + Decoder* decoder = (Decoder*)ctx; + if (decoder == nullptr) { + return 1; + } + return decoder->shutdownCallback(); +} + +int Decoder::shutdownCallback() { + return interrupted_ ? 1 : 0; +} + +/* static */ +int Decoder::readFunction(void* opaque, uint8_t* buf, int size) { + Decoder* decoder = reinterpret_cast(opaque); + if (decoder == nullptr) { + return 0; + } + return decoder->readCallback(buf, size); +} + +/* static */ +int64_t Decoder::seekFunction(void* opaque, int64_t offset, int whence) { + Decoder* decoder = reinterpret_cast(opaque); + if (decoder == nullptr) { + return -1; + } + return decoder->seekCallback(offset, whence); +} + +int Decoder::readCallback(uint8_t* buf, int size) { + return seekableBuffer_.read(buf, size, params_.timeoutMs); +} + +int64_t Decoder::seekCallback(int64_t offset, int whence) { + return seekableBuffer_.seek(offset, whence, params_.timeoutMs); +} + +/* static */ +void Decoder::initOnce() { + static std::once_flag flagInit; + std::call_once(flagInit, []() { + av_register_all(); + avcodec_register_all(); + avformat_network_init(); + // register ffmpeg lock manager + av_lockmgr_register(&ffmpeg_lock); + av_log_set_callback(Decoder::logFunction); + av_log_set_level(AV_LOG_ERROR); + VLOG(1) << "Registered ffmpeg libs"; + }); +} + +Decoder::Decoder() { + initOnce(); +} + +Decoder::~Decoder() { + cleanUp(); +} + +bool Decoder::init( + const DecoderParameters& params, + DecoderInCallback&& in, + std::vector* metadata) { + cleanUp(); + + if ((params.uri.empty() || in) && (!params.uri.empty() || !in)) { + LOG(ERROR) + << "uuid=" << params_.loggingUuid + << " either external URI gets provided or explicit input callback"; + return false; + } + + // set callback and params + params_ = params; + + if (!(inputCtx_ = avformat_alloc_context())) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " cannot allocate format context"; + return false; + } + + AVInputFormat* fmt = nullptr; + int result = 0; + if (in) { + ImageType type = ImageType::UNKNOWN; + if ((result = seekableBuffer_.init( + std::forward(in), + params_.timeoutMs, + params_.maxSeekableBytes, + params_.isImage ? &type : nullptr)) < 0) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " can't initiate seekable buffer"; + cleanUp(); + return false; + } + + if (params_.isImage) { + const char* fmtName = "image2"; + switch (type) { + case ImageType::JPEG: + fmtName = "jpeg_pipe"; + break; + case ImageType::PNG: + fmtName = "png_pipe"; + break; + case ImageType::TIFF: + fmtName = "tiff_pipe"; + break; + default: + break; + } + + fmt = av_find_input_format(fmtName); + } + + const size_t avioCtxBufferSize = kIoBufferSize; + uint8_t* avioCtxBuffer = + (uint8_t*)av_malloc(avioCtxBufferSize + kIoPaddingSize); + if (!avioCtxBuffer) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " av_malloc cannot allocate " << avioCtxBufferSize + << " bytes"; + cleanUp(); + return false; + } + + if (!(avioCtx_ = avio_alloc_context( + avioCtxBuffer, + avioCtxBufferSize, + 0, + reinterpret_cast(this), + &Decoder::readFunction, + nullptr, + result == 1 ? &Decoder::seekFunction : nullptr))) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " avio_alloc_context failed"; + av_free(avioCtxBuffer); + cleanUp(); + return false; + } + + inputCtx_->pb = avioCtx_; + inputCtx_->flags |= AVFMT_FLAG_CUSTOM_IO; + } + + inputCtx_->opaque = reinterpret_cast(this); + inputCtx_->interrupt_callback.callback = Decoder::shutdownFunction; + inputCtx_->interrupt_callback.opaque = reinterpret_cast(this); + + // add network timeout + inputCtx_->flags |= AVFMT_FLAG_NONBLOCK; + + AVDictionary* options = nullptr; + if (params_.listen) { + av_dict_set_int(&options, "listen", 1, 0); + } + if (params_.timeoutMs > 0) { + av_dict_set_int(&options, "analyzeduration", params_.timeoutMs * 1000, 0); + av_dict_set_int(&options, "stimeout", params_.timeoutMs * 1000, 0); + av_dict_set_int(&options, "rw_timeout", params_.timeoutMs * 1000, 0); + if (!params_.tlsCertFile.empty()) { + av_dict_set(&options, "cert_file", params_.tlsCertFile.data(), 0); + } + if (!params_.tlsKeyFile.empty()) { + av_dict_set(&options, "key_file", params_.tlsKeyFile.data(), 0); + } + } + + interrupted_ = false; + + // ffmpeg avformat_open_input call can hang if media source doesn't respond + // set a guard for handle such situations, if requested + std::promise p; + std::future f = p.get_future(); + std::unique_ptr guard; + if (params_.preventStaleness) { + guard = std::make_unique([&f, this]() { + auto timeout = std::chrono::milliseconds(params_.timeoutMs); + if (std::future_status::timeout == f.wait_for(timeout)) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " cannot open stream within " << params_.timeoutMs + << " ms"; + interrupted_ = true; + } + }); + } + + if (fmt) { + result = avformat_open_input(&inputCtx_, nullptr, fmt, &options); + } else { + result = + avformat_open_input(&inputCtx_, params_.uri.c_str(), nullptr, &options); + } + + av_dict_free(&options); + + if (guard) { + p.set_value(true); + guard->join(); + guard.reset(); + } + + if (result < 0 || interrupted_) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " avformat_open_input failed, error=" + << Util::generateErrorDesc(result); + cleanUp(); + return false; + } + + result = avformat_find_stream_info(inputCtx_, nullptr); + + if (result < 0) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " avformat_find_stream_info failed, error=" + << Util::generateErrorDesc(result); + cleanUp(); + return false; + } + + if (!openStreams(metadata)) { + LOG(ERROR) << "uuid=" << params_.loggingUuid << " cannot activate streams"; + cleanUp(); + return false; + } + + onInit(); + + if (params.startOffset != 0) { + auto offset = params.startOffset <= params.seekAccuracy + ? 0 + : params.startOffset - params.seekAccuracy; + + av_seek_frame(inputCtx_, -1, offset, AVSEEK_FLAG_BACKWARD); + } + + VLOG(1) << "Decoder initialized, log level: " << params_.logLevel; + return true; +} + +bool Decoder::openStreams(std::vector* metadata) { + for (int i = 0; i < inputCtx_->nb_streams; i++) { + // - find the corespondent format at params_.formats set + MediaFormat format; + const auto media = inputCtx_->streams[i]->codec->codec_type; + if (!mapFfmpegType(media, &format.type)) { + VLOG(1) << "Stream media: " << media << " at index " << i + << " gets ignored, unknown type"; + + continue; // unsupported type + } + + // check format + auto it = params_.formats.find(format); + if (it == params_.formats.end()) { + VLOG(1) << "Stream type: " << format.type << " at index: " << i + << " gets ignored, caller is not interested"; + continue; // clients don't care about this media format + } + + // do we have stream of this type? + auto stream = findByType(format); + + // should we process this stream? + + if (it->stream == -2 || // all streams of this type are welcome + (!stream && (it->stream == -1 || it->stream == i))) { // new stream + VLOG(1) << "Stream type: " << format.type << " found, at index: " << i; + auto stream = createStream( + format.type, + inputCtx_, + i, + params_.convertPtsToWallTime, + it->format, + params_.loggingUuid); + CHECK(stream); + if (stream->openCodec(metadata) < 0) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " open codec failed, stream_idx=" << i; + return false; + } + streams_.emplace(i, std::move(stream)); + inRange_.set(i, true); + } + } + + return true; +} + +void Decoder::shutdown() { + cleanUp(); +} + +void Decoder::interrupt() { + interrupted_ = true; +} + +void Decoder::cleanUp() { + if (!interrupted_) { + interrupted_ = true; + } + + if (inputCtx_) { + for (auto& stream : streams_) { + // Drain stream buffers. + DecoderOutputMessage msg; + while (msg.payload = nullptr, stream.second->flush(&msg, true) > 0) { + } + stream.second.reset(); + } + streams_.clear(); + avformat_close_input(&inputCtx_); + } + if (avioCtx_) { + av_freep(&avioCtx_->buffer); + av_freep(&avioCtx_); + } + + // reset callback + seekableBuffer_.shutdown(); +} + +int Decoder::getFrame(size_t workingTimeInMs) { + if (inRange_.none()) { + return ENODATA; + } + // decode frames until cache is full and leave thread + // once decode() method gets called and grab some bytes + // run this method again + // init package + AVPacket avPacket; + av_init_packet(&avPacket); + avPacket.data = nullptr; + avPacket.size = 0; + + auto end = std::chrono::steady_clock::now() + + std::chrono::milliseconds(workingTimeInMs); + // return true if elapsed time less than timeout + auto watcher = [end]() -> bool { + return std::chrono::steady_clock::now() <= end; + }; + + int result = 0; + size_t decodingErrors = 0; + bool decodedFrame = false; + while (!interrupted_ && inRange_.any() && !decodedFrame && watcher()) { + result = av_read_frame(inputCtx_, &avPacket); + if (result == AVERROR(EAGAIN)) { + VLOG(4) << "Decoder is busy..."; + std::this_thread::yield(); + result = 0; // reset error, EAGAIN is not an error at all + continue; + } else if (result == AVERROR_EOF) { + flushStreams(); + VLOG(1) << "End of stream"; + result = ENODATA; + break; + } else if (result < 0) { + flushStreams(); + LOG(ERROR) << "Error detected: " << Util::generateErrorDesc(result); + break; + } + + // get stream + auto stream = findByIndex(avPacket.stream_index); + if (stream == nullptr || !inRange_.test(stream->getIndex())) { + av_packet_unref(&avPacket); + continue; + } + + size_t numConsecutiveNoBytes = 0; + // it can be only partial decoding of the package bytes + do { + // decode package + bool gotFrame = false; + bool hasMsg = false; + // packet either got consumed completely or not at all + if ((result = processPacket(stream, &avPacket, &gotFrame, &hasMsg)) < 0) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " processPacket failed with code=" << result; + break; + } + + if (!gotFrame && params_.maxProcessNoBytes != 0 && + ++numConsecutiveNoBytes > params_.maxProcessNoBytes) { + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " exceeding max amount of consecutive no bytes"; + break; + } + if (result > 0) { + numConsecutiveNoBytes = 0; + } + + decodedFrame |= hasMsg; + } while (result == 0); + + // post loop check + if (result < 0) { + if (params_.maxPackageErrors != 0 && // check errors + ++decodingErrors >= params_.maxPackageErrors) { // reached the limit + LOG(ERROR) << "uuid=" << params_.loggingUuid + << " exceeding max amount of consecutive package errors"; + break; + } + } else { + decodingErrors = 0; // reset on success + } + + result = 0; + + av_packet_unref(&avPacket); + } + + av_packet_unref(&avPacket); + + VLOG(2) << "Interrupted loop" + << ", interrupted_ " << interrupted_ << ", inRange_.any() " + << inRange_.any() << ", decodedFrame " << decodedFrame << ", result " + << result; + + // loop can be terminated, either by: + // 1. explcitly iterrupted + // 2. terminated by workable timeout + // 3. unrecoverable error or ENODATA (end of stream) + // 4. decoded frames pts are out of the specified range + // 5. success decoded frame + if (interrupted_) { + return EINTR; + } + if (result != 0) { + return result; + } + if (inRange_.none()) { + return ENODATA; + } + return 0; +} + +Stream* Decoder::findByIndex(int streamIndex) const { + auto it = streams_.find(streamIndex); + return it != streams_.end() ? it->second.get() : nullptr; +} + +Stream* Decoder::findByType(const MediaFormat& format) const { + for (auto& stream : streams_) { + if (stream.second->getMediaFormat().type == format.type) { + return stream.second.get(); + } + } + return nullptr; +} + +int Decoder::processPacket( + Stream* stream, + AVPacket* packet, + bool* gotFrame, + bool* hasMsg) { + // decode package + int result; + DecoderOutputMessage msg; + msg.payload = params_.headerOnly ? nullptr : createByteStorage(0); + *hasMsg = false; + if ((result = stream->decodePacket( + packet, &msg, params_.headerOnly, gotFrame)) >= 0 && + *gotFrame) { + // check end offset + bool endInRange = + params_.endOffset <= 0 || msg.header.pts <= params_.endOffset; + inRange_.set(stream->getIndex(), endInRange); + if (endInRange && msg.header.pts >= params_.startOffset) { + *hasMsg = true; + push(std::move(msg)); + } + } + return result; +} + +void Decoder::flushStreams() { + VLOG(1) << "Flushing streams..."; + for (auto& stream : streams_) { + DecoderOutputMessage msg; + while (msg.payload = (params_.headerOnly ? nullptr : createByteStorage(0)), + stream.second->flush(&msg, params_.headerOnly) > 0) { + // check end offset + bool endInRange = + params_.endOffset <= 0 || msg.header.pts <= params_.endOffset; + inRange_.set(stream.second->getIndex(), endInRange); + if (endInRange && msg.header.pts >= params_.startOffset) { + push(std::move(msg)); + } else { + msg.payload.reset(); + } + } + } +} + +int Decoder::decode_all(const DecoderOutCallback& callback) { + int result; + do { + DecoderOutputMessage out; + if (0 == (result = decode(&out, params_.timeoutMs))) { + callback(std::move(out)); + } + } while (result == 0); + return result; +} +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/decoder.h b/torchvision/csrc/io/decoder/decoder.h new file mode 100644 index 00000000000..c2d8f163bc3 --- /dev/null +++ b/torchvision/csrc/io/decoder/decoder.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include +#include "seekable_buffer.h" +#include "stream.h" + +#if defined(_MSC_VER) +#include +using ssize_t = SSIZE_T; +#endif + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode media streams. + * Media bytes can be explicitly provided through read-callback + * or fetched internally by FFMPEG library + */ +class Decoder : public MediaDecoder { + public: + Decoder(); + ~Decoder() override; + + // MediaDecoder overrides + bool init( + const DecoderParameters& params, + DecoderInCallback&& in, + std::vector* metadata) override; + int decode_all(const DecoderOutCallback& callback) override; + void shutdown() override; + void interrupt() override; + + protected: + // function does actual work, derived class calls it in working thread + // periodically. On success method returns 0, ENOADATA on EOF, ETIMEDOUT if + // no frames got decoded in the specified timeout time, and error on + // unrecoverable error. + int getFrame(size_t workingTimeInMs = 100); + + // Derived class must override method and consume the provided message + virtual void push(DecoderOutputMessage&& buffer) = 0; + + // Fires on init call + virtual void onInit() {} + + public: + // C-style FFMPEG API requires C/static methods for callbacks + static void logFunction(void* avcl, int level, const char* cfmt, va_list vl); + static int shutdownFunction(void* ctx); + static int readFunction(void* opaque, uint8_t* buf, int size); + static int64_t seekFunction(void* opaque, int64_t offset, int whence); + // can be called by any classes or API + static void initOnce(); + + int* getPrintPrefix() { + return &printPrefix; + } + + private: + // mark below function for a proper invocation + virtual bool enableLogLevel(int level) const; + virtual void logCallback(int level, const std::string& message); + virtual int readCallback(uint8_t* buf, int size); + virtual int64_t seekCallback(int64_t offset, int whence); + virtual int shutdownCallback(); + + bool openStreams(std::vector* metadata); + Stream* findByIndex(int streamIndex) const; + Stream* findByType(const MediaFormat& format) const; + int processPacket( + Stream* stream, + AVPacket* packet, + bool* gotFrame, + bool* hasMsg); + void flushStreams(); + void cleanUp(); + + protected: + DecoderParameters params_; + + private: + SeekableBuffer seekableBuffer_; + int printPrefix{1}; + + std::atomic interrupted_{false}; + AVFormatContext* inputCtx_{nullptr}; + AVIOContext* avioCtx_{nullptr}; + std::unordered_map> streams_; + std::bitset<64> inRange_; +}; +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/defs.h b/torchvision/csrc/io/decoder/defs.h new file mode 100644 index 00000000000..b828934bdf0 --- /dev/null +++ b/torchvision/csrc/io/decoder/defs.h @@ -0,0 +1,390 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +extern "C" { +#include +#include +#include +#include +#include +#include +#include "libswscale/swscale.h" +} + +namespace ffmpeg { + +// bit mask of formats, keep them in form 2^n +enum MediaType : size_t { + TYPE_AUDIO = 1, + TYPE_VIDEO = 2, + TYPE_SUBTITLE = 4, + TYPE_CC = 8, // closed captions from transport streams +}; + +// audio +struct AudioFormat { + // fields are initialized for the auto detection + // caller can specify some/all of field values if specific output is desirable + bool operator==(const AudioFormat& x) const { + return x.format == format && x.samples == samples && x.channels == channels; + } + + size_t samples{0}; // number samples per second (frequency) + size_t channels{0}; // number of channels + long format{-1}; // AVSampleFormat, auto AV_SAMPLE_FMT_NONE + size_t padding[2]; + // -- alignment 40 bytes +}; + +// video +struct VideoFormat { + // fields are initialized for the auto detection + // caller can specify some/all of field values if specific output is desirable + bool operator==(const VideoFormat& x) const { + return x.format == format && x.width == width && x.height == height; + } + /* + When width = 0, height = 0, minDimension = 0, and maxDimension = 0, + keep the original frame resolution + When width = 0, height = 0, minDimension != 0, and maxDimension = 0, + keep the aspect ratio and resize the frame so that shorter edge size is + minDimension + When width = 0, height = 0, minDimension = 0, and maxDimension != 0, + keep the aspect ratio and resize the frame so that longer edge size is + maxDimension + When width = 0, height = 0, minDimension != 0, and maxDimension != 0, + resize the frame so that shorter edge size is minDimension, and + longer edge size is maxDimension. The aspect ratio may not be preserved + When width = 0, height != 0, minDimension = 0, and maxDimension = 0, + keep the aspect ratio and resize the frame so that frame height is $height + When width != 0, height = 0, minDimension = 0, and maxDimension = 0, + keep the aspect ratio and resize the frame so that frame width is $width + When width != 0, height != 0, minDimension = 0, and maxDimension = 0, + resize the frame so that frame width and height are set to $width and + $height, + respectively + */ + size_t width{0}; // width in pixels + size_t height{0}; // height in pixels + long format{-1}; // AVPixelFormat, auto AV_PIX_FMT_NONE + size_t minDimension{0}; // choose min dimension and rescale accordingly + size_t maxDimension{0}; // choose max dimension and rescale accordingly + size_t cropImage{0}; // request image crop + // -- alignment 40 bytes +}; + +// subtitle/cc +struct SubtitleFormat { + long type{0}; // AVSubtitleType, auto SUBTITLE_NONE + size_t padding[4]; + // -- alignment 40 bytes +}; + +union FormatUnion { + FormatUnion() : audio() {} + explicit FormatUnion(int) : video() {} + explicit FormatUnion(char) : subtitle() {} + explicit FormatUnion(double) : subtitle() {} + AudioFormat audio; + VideoFormat video; + SubtitleFormat subtitle; + // -- alignment 40 bytes +}; + +/* + MediaFormat data structure serves as input/output parameter. + Caller assigns values for input formats + or leave default values for auto detection + For output formats all fields will be set to the specific values +*/ +struct MediaFormat { + // for using map/set data structures + bool operator<(const MediaFormat& x) const { + return type < x.type; + } + bool operator==(const MediaFormat& x) const { + if (type != x.type) { + return false; + } + switch (type) { + case TYPE_AUDIO: + return format.audio == x.format.audio; + case TYPE_VIDEO: + return format.video == x.format.video; + case TYPE_SUBTITLE: + case TYPE_CC: + return true; + default: + return false; + } + } + + explicit MediaFormat(long s = -1) : type(TYPE_AUDIO), stream(s), format() {} + explicit MediaFormat(int x, long s = -1) + : type(TYPE_VIDEO), stream(s), format(x) {} + explicit MediaFormat(char x, long s = -1) + : type(TYPE_SUBTITLE), stream(s), format(x) {} + explicit MediaFormat(double x, long s = -1) + : type(TYPE_CC), stream(s), format(x) {} + + static MediaFormat makeMediaFormat(AudioFormat format, long stream) { + MediaFormat result(stream); + result.format.audio = format; + return result; + } + + static MediaFormat makeMediaFormat(VideoFormat format, long stream) { + MediaFormat result(0, stream); + result.format.video = format; + return result; + } + + static MediaFormat makeMediaFormat(SubtitleFormat format, long stream) { + MediaFormat result('0', stream); + result.format.subtitle = format; + return result; + } + + // format type + MediaType type; + // stream index: + // set -1 for one stream auto detection, -2 for all streams auto detection, + // >= 0, specified stream, if caller knows the stream index (unlikely) + long stream; + // union keeps one of the possible formats, defined by MediaType + FormatUnion format; +}; + +struct DecoderParameters { + // local file, remote file, http url, rtmp stream uri, etc. anything that + // ffmpeg can recognize + std::string uri; + // timeout on getting bytes for decoding + size_t timeoutMs{1000}; + // logging level, default AV_LOG_PANIC + long logLevel{0}; + // when decoder would give up, 0 means never + size_t maxPackageErrors{0}; + // max allowed consecutive times no bytes are processed. 0 means for infinite. + size_t maxProcessNoBytes{0}; + // start offset (us) + long startOffset{0}; + // end offset (us) + long endOffset{-1}; + // logging id + int64_t loggingUuid{0}; + // internal max seekable buffer size + size_t maxSeekableBytes{0}; + // adjust header pts to the epoch time + bool convertPtsToWallTime{false}; + // indicate if input stream is an encoded image + bool isImage{false}; + // listen and wait for new rtmp stream + bool listen{false}; + // don't copy frame body, only header + bool headerOnly{false}; + // interrupt init method on timeout + bool preventStaleness{true}; + // seek tolerated accuracy (us) + double seekAccuracy{1000000.0}; + // what media types should be processed, default none + std::set formats; + + // can be used for asynchronous decoders + size_t cacheSize{8192}; // mow many bytes to cache before stop reading bytes + size_t cacheTimeoutMs{1000}; // timeout on bytes writing + bool enforceCacheSize{false}; // drop output frames if cache is full + bool mergeAudioMessages{false}; // combine collocated audio messages together + + std::string tlsCertFile; + std::string tlsKeyFile; +}; + +struct DecoderHeader { + // message id, from 0 till ... + size_t seqno{0}; + // decoded timestamp in microseconds from either beginning of the stream or + // from epoch time, see DecoderParameters::convertPtsToWallTime + long pts{0}; + // decoded key frame + size_t keyFrame{0}; + // frames per second, valid only for video streams + double fps{0}; + // format specifies what kind frame is in a payload + MediaFormat format; +}; + +// Abstract interface ByteStorage class +class ByteStorage { + public: + virtual ~ByteStorage() = default; + // makes sure that buffer has at least n bytes available for writing, if not + // storage must reallocate memory. + virtual void ensure(size_t n) = 0; + // caller must not to write more than available bytes + virtual uint8_t* writableTail() = 0; + // caller confirms that n bytes were written to the writable tail + virtual void append(size_t n) = 0; + // caller confirms that n bytes were read from the read buffer + virtual void trim(size_t n) = 0; + // gives an access to the beginning of the read buffer + virtual const uint8_t* data() const = 0; + // returns the stored size in bytes + virtual size_t length() const = 0; + // returns available capacity for writable tail + virtual size_t tail() const = 0; + // clears content, keeps capacity + virtual void clear() = 0; +}; + +struct DecoderOutputMessage { + DecoderHeader header; + std::unique_ptr payload; +}; + +/* + * External provider of the ecnoded bytes, specific implementation is left for + * different use cases, like file, memory, external network end-points, etc. + * Normally input/output parameter @out set to valid, not null buffer pointer, + * which indicates "read" call, however there are "seek" modes as well. + + * @out != nullptr => read from the current offset, @whence got ignored, + * @size bytes to read => return number bytes got read, 0 if no more bytes + * available, < 0 on error. + + * @out == nullptr, @timeoutMs == 0 => does provider support "seek" + * capability in a first place? @size & @whence got ignored, return 0 on + * success, < 0 if "seek" mode is not supported. + + * @out == nullptr, @timeoutMs != 0 => normal seek call + * offset == @size, i.e. @whence = [SEEK_SET, SEEK_CUR, SEEK_END, AVSEEK_SIZE) + * return < 0 on error, position if @whence = [SEEK_SET, SEEK_CUR, SEEK_END], + * length of buffer if @whence = [AVSEEK_SIZE]. + */ +using DecoderInCallback = + std::function; + +using DecoderOutCallback = std::function; + +struct DecoderMetadata { + // time base numerator + long num{0}; + // time base denominator + long den{1}; + // duration of the stream, in miscroseconds, if available + long duration{-1}; + // frames per second, valid only for video streams + double fps{0}; + // format specifies what kind frame is in a payload + MediaFormat format; +}; +/** + * Abstract class for decoding media bytes + * It has two diffrent modes. Internal media bytes retrieval for given uri and + * external media bytes provider in case of memory streams + */ +class MediaDecoder { + public: + virtual ~MediaDecoder() = default; + + /** + * Initializes media decoder with parameters, + * calls callback when media bytes are available. + * Media bytes get fetched internally from provided URI + * or invokes provided input callback to get media bytes. + * Input callback must be empty for the internal media provider + * Caller can provide non-null pointer for the input container + * if headers to obtain the streams metadata (optional) + */ + virtual bool init( + const DecoderParameters& params, + DecoderInCallback&& in, + std::vector* metadata) = 0; + + /** + * Polls available decoded one frame from decoder + * Returns error code, 0 - for success + */ + virtual int decode(DecoderOutputMessage* out, uint64_t timeoutMs) = 0; + + /** + * Polls available decoded bytes from decoder, till EOF or error + */ + virtual int decode_all(const DecoderOutCallback& callback) = 0; + + /** + * Stops calling callback, releases resources + */ + virtual void shutdown() = 0; + + /** + * Interrupts whatever decoder is doing at any time + */ + virtual void interrupt() = 0; + + /** + * Factory to create ByteStorage class instances, particular implementation is + * left to the derived class. Caller provides the initially allocated size + */ + virtual std::unique_ptr createByteStorage(size_t n) = 0; +}; + +struct SamplerParameters { + MediaType type{TYPE_AUDIO}; + FormatUnion in; + FormatUnion out; + int64_t loggingUuid{0}; +}; + +/** + * Abstract class for sampling media bytes + */ +class MediaSampler { + public: + virtual ~MediaSampler() = default; + + /** + * Initializes media sampler with parameters + */ + virtual bool init(const SamplerParameters& params) = 0; + + /** + * Samples media bytes + * Returns error code < 0, or >=0 - for success, indicating number of bytes + * processed. + * set @in to null for flushing data + */ + virtual int sample(const ByteStorage* in, ByteStorage* out) = 0; + + /** + * Releases resources + */ + virtual void shutdown() = 0; + + /* + * Returns media type + */ + MediaType getMediaType() const { + return params_.type; + } + /* + * Returns formats + */ + FormatUnion getInputFormat() const { + return params_.in; + } + FormatUnion getOutFormat() const { + return params_.out; + } + + protected: + SamplerParameters params_; +}; +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/memory_buffer.cpp b/torchvision/csrc/io/decoder/memory_buffer.cpp new file mode 100644 index 00000000000..a7b0128e3ed --- /dev/null +++ b/torchvision/csrc/io/decoder/memory_buffer.cpp @@ -0,0 +1,71 @@ +#include "memory_buffer.h" +#include + +namespace ffmpeg { + +MemoryBuffer::MemoryBuffer(const uint8_t* buffer, size_t size) + : buffer_(buffer), len_(size) {} + +int MemoryBuffer::read(uint8_t* buf, int size) { + if (pos_ < len_) { + auto available = std::min(int(len_ - pos_), size); + memcpy(buf, buffer_ + pos_, available); + pos_ += available; + return available; + } + + return 0; +} + +int64_t MemoryBuffer::seek(int64_t offset, int whence) { + if (whence & AVSEEK_SIZE) { + return len_; + } + + // remove force flag + whence &= ~AVSEEK_FORCE; + + switch (whence) { + case SEEK_SET: + if (offset >= 0 && offset <= len_) { + pos_ = offset; + } + break; + case SEEK_END: + if (len_ + offset >= 0 && len_ + offset <= len_) { + pos_ = len_ + offset; + } + break; + case SEEK_CUR: + if (pos_ + offset > 0 && pos_ + offset <= len_) { + pos_ += offset; + } + break; + default: + LOG(ERROR) << "Unknown whence flag gets provided: " << whence; + } + return pos_; +} + +/* static */ +DecoderInCallback MemoryBuffer::getCallback( + const uint8_t* buffer, + size_t size) { + MemoryBuffer object(buffer, size); + return + [object](uint8_t* out, int size, int whence, uint64_t timeoutMs) mutable + -> int { + if (out) { // see defs.h file + // read mode + return object.read(out, size); + } + // seek mode + if (!timeoutMs) { + // seek capabilty, yes - supported + return 0; + } + return object.seek(size, whence); + }; +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/memory_buffer.h b/torchvision/csrc/io/decoder/memory_buffer.h new file mode 100644 index 00000000000..909626d3cae --- /dev/null +++ b/torchvision/csrc/io/decoder/memory_buffer.h @@ -0,0 +1,25 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class uses external memory buffer and implements a seekable interface. + */ +class MemoryBuffer { + public: + explicit MemoryBuffer(const uint8_t* buffer, size_t size); + int64_t seek(int64_t offset, int whence); + int read(uint8_t* buf, int size); + + // static constructor for decoder callback. + static DecoderInCallback getCallback(const uint8_t* buffer, size_t size); + + private: + const uint8_t* buffer_; // set at construction time + long pos_{0}; // current position + long len_{0}; // bytes in buffer +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/seekable_buffer.cpp b/torchvision/csrc/io/decoder/seekable_buffer.cpp new file mode 100644 index 00000000000..41e3e689c7b --- /dev/null +++ b/torchvision/csrc/io/decoder/seekable_buffer.cpp @@ -0,0 +1,139 @@ +#include "seekable_buffer.h" +#include +#include +#include "memory_buffer.h" + +namespace ffmpeg { + +int SeekableBuffer::init( + DecoderInCallback&& in, + uint64_t timeoutMs, + size_t maxSeekableBytes, + ImageType* type) { + shutdown(); + isSeekable_ = in(nullptr, 0, 0, 0) == 0; + if (isSeekable_) { // seekable + if (type) { + if (!readBytes(in, 8, timeoutMs)) { + return -1; + } + setImageType(type); + end_ = 0; + eof_ = false; + std::vector().swap(buffer_); + // reset callback + if (in(nullptr, 0, SEEK_SET, timeoutMs)) { + return -1; + } + } + inCallback_ = std::forward(in); + return 1; + } + + if (!readBytes(in, maxSeekableBytes + (type ? 8 : 0), timeoutMs)) { + return -1; + } + + if (type) { + setImageType(type); + } + + if (eof_) { + end_ = 0; + eof_ = false; + // reuse MemoryBuffer functionality + inCallback_ = MemoryBuffer::getCallback(buffer_.data(), buffer_.size()); + isSeekable_ = true; + return 1; + } + inCallback_ = std::forward(in); + return 0; +} + +bool SeekableBuffer::readBytes( + DecoderInCallback& in, + size_t maxBytes, + uint64_t timeoutMs) { + // Resize to th minimum 4K page or less + buffer_.resize(std::min(maxBytes, size_t(4 * 1024UL))); + end_ = 0; + eof_ = false; + + auto end = + std::chrono::steady_clock::now() + std::chrono::milliseconds(timeoutMs); + auto watcher = [end]() -> bool { + return std::chrono::steady_clock::now() <= end; + }; + + bool hasTime = true; + while (!eof_ && end_ < maxBytes && (hasTime = watcher())) { + // lets read all bytes into available buffer + auto res = in(buffer_.data() + end_, buffer_.size() - end_, 0, timeoutMs); + if (res > 0) { + end_ += res; + if (end_ == buffer_.size()) { + buffer_.resize(std::min(size_t(end_ * 4UL), maxBytes)); + } + } else if (res == 0) { + eof_ = true; + } else { + // error + return false; + } + } + + buffer_.resize(end_); + + return hasTime; +} + +void SeekableBuffer::setImageType(ImageType* type) { + if (buffer_.size() > 2 && buffer_[0] == 0xFF && buffer_[1] == 0xD8 && + buffer_[2] == 0xFF) { + *type = ImageType::JPEG; + } else if ( + buffer_.size() > 3 && buffer_[1] == 'P' && buffer_[2] == 'N' && + buffer_[3] == 'G') { + *type = ImageType::PNG; + } else if ( + buffer_.size() > 1 && + ((buffer_[0] == 0x49 && buffer_[1] == 0x49) || + (buffer_[0] == 0x4D && buffer_[1] == 0x4D))) { + *type = ImageType::TIFF; + } else { + *type = ImageType::UNKNOWN; + } +} + +int SeekableBuffer::read(uint8_t* buf, int size, uint64_t timeoutMs) { + if (isSeekable_) { + return inCallback_(buf, size, 0, timeoutMs); + } + if (pos_ < end_) { + // read cached bytes for non-seekable callback + auto available = std::min(int(end_ - pos_), size); + memcpy(buf, buffer_.data() + pos_, available); + pos_ += available; + return available; + } else if (!eof_) { + // normal sequential read (see defs.h file), i.e. @buf != null + auto res = inCallback_(buf, size, 0, timeoutMs); // read through + eof_ = res == 0; + return res; + } else { + return 0; + } +} + +int64_t SeekableBuffer::seek(int64_t offset, int whence, uint64_t timeoutMs) { + return inCallback_(nullptr, offset, whence, timeoutMs); +} + +void SeekableBuffer::shutdown() { + pos_ = end_ = 0; + eof_ = false; + std::vector().swap(buffer_); + inCallback_ = nullptr; +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/seekable_buffer.h b/torchvision/csrc/io/decoder/seekable_buffer.h new file mode 100644 index 00000000000..9d5729f5306 --- /dev/null +++ b/torchvision/csrc/io/decoder/seekable_buffer.h @@ -0,0 +1,45 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class uses internal buffer to store initial size bytes as a seekable cache + * from Media provider and let ffmpeg to seek and read bytes from cache + * and beyond - reading bytes directly from Media provider + */ +enum class ImageType { + UNKNOWN = 0, + JPEG = 1, + PNG = 2, + TIFF = 3, +}; + +class SeekableBuffer { + public: + // @type is optional, not nullptr only is image detection required + // \returns 1 is buffer seekable, 0 - if not seekable, < 0 on error + int init( + DecoderInCallback&& in, + uint64_t timeoutMs, + size_t maxSeekableBytes, + ImageType* type); + int read(uint8_t* buf, int size, uint64_t timeoutMs); + int64_t seek(int64_t offset, int whence, uint64_t timeoutMs); + void shutdown(); + + private: + bool readBytes(DecoderInCallback& in, size_t maxBytes, uint64_t timeoutMs); + void setImageType(ImageType* type); + + private: + DecoderInCallback inCallback_; + std::vector buffer_; // resized at init time + long pos_{0}; // current position (SEEK_CUR iff pos_ < end_) + long end_{0}; // current buffer size + bool eof_{0}; // indicates the EOF + bool isSeekable_{false}; // is callback seekable +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/stream.cpp b/torchvision/csrc/io/decoder/stream.cpp new file mode 100644 index 00000000000..4da48647382 --- /dev/null +++ b/torchvision/csrc/io/decoder/stream.cpp @@ -0,0 +1,258 @@ +#include "stream.h" +#include +#include "util.h" + +namespace ffmpeg { +const AVRational timeBaseQ = AVRational{1, AV_TIME_BASE}; + +Stream::Stream( + AVFormatContext* inputCtx, + MediaFormat format, + bool convertPtsToWallTime, + int64_t loggingUuid) + : inputCtx_(inputCtx), + format_(format), + convertPtsToWallTime_(convertPtsToWallTime), + loggingUuid_(loggingUuid) {} + +Stream::~Stream() { + if (frame_) { + av_free(frame_); // Copyright 2004-present Facebook. All Rights Reserved. + } + if (codecCtx_) { + avcodec_free_context(&codecCtx_); + } +} + +AVCodec* Stream::findCodec(AVCodecParameters* params) { + return avcodec_find_decoder(params->codec_id); +} + +int Stream::openCodec(std::vector* metadata) { + AVStream* steam = inputCtx_->streams[format_.stream]; + + AVCodec* codec = findCodec(steam->codecpar); + if (!codec) { + LOG(ERROR) << "LoggingUuid #" << loggingUuid_ + << ", avcodec_find_decoder failed for codec_id: " + << int(steam->codecpar->codec_id); + return AVERROR(EINVAL); + } + + if (!(codecCtx_ = avcodec_alloc_context3(codec))) { + LOG(ERROR) << "LoggingUuid #" << loggingUuid_ + << ", avcodec_alloc_context3 failed"; + return AVERROR(ENOMEM); + } + + int ret; + // Copy codec parameters from input stream to output codec context + if ((ret = avcodec_parameters_to_context(codecCtx_, steam->codecpar)) < 0) { + LOG(ERROR) << "LoggingUuid #" << loggingUuid_ + << ", avcodec_parameters_to_context failed"; + return ret; + } + + // after avcodec_open2, value of codecCtx_->time_base is NOT meaningful + if ((ret = avcodec_open2(codecCtx_, codec, nullptr)) < 0) { + LOG(ERROR) << "LoggingUuid #" << loggingUuid_ + << ", avcodec_open2 failed: " << Util::generateErrorDesc(ret); + avcodec_free_context(&codecCtx_); + codecCtx_ = nullptr; + return ret; + } + + frame_ = av_frame_alloc(); + + switch (format_.type) { + case TYPE_VIDEO: + fps_ = av_q2d(av_guess_frame_rate(inputCtx_, steam, nullptr)); + break; + case TYPE_AUDIO: + fps_ = codecCtx_->sample_rate; + break; + default: + fps_ = 30.0; + } + + if ((ret = initFormat())) { + LOG(ERROR) << "initFormat failed, type: " << format_.type; + } + + if (metadata) { + DecoderMetadata header; + header.format = format_; + header.fps = fps_; + header.num = steam->time_base.num; + header.den = steam->time_base.den; + header.duration = + av_rescale_q(steam->duration, steam->time_base, timeBaseQ); + metadata->push_back(header); + } + + return ret; +} + +int Stream::analyzePacket(const AVPacket* packet, bool* gotFrame) { + int consumed = 0; + int result = avcodec_send_packet(codecCtx_, packet); + if (result == AVERROR(EAGAIN)) { + *gotFrame = false; // no bytes get consumed, fetch frame + } else if (result == AVERROR_EOF) { + *gotFrame = false; // more than one flush packet + if (packet) { + // got packet after flush, this is an error + return result; + } + } else if (result < 0) { + LOG(ERROR) << "avcodec_send_packet failed, err: " + << Util::generateErrorDesc(result); + return result; // error + } else { + consumed = packet ? packet->size : 0; // all bytes get consumed + } + + result = avcodec_receive_frame(codecCtx_, frame_); + + if (result >= 0) { + *gotFrame = true; // frame is available + } else if (result == AVERROR(EAGAIN)) { + *gotFrame = false; // no frames at this time, needs more packets + if (!consumed) { + // precaution, if no packages got consumed and no frames are available + return result; + } + } else if (result == AVERROR_EOF) { + *gotFrame = false; // the last frame has been flushed + // precaution, if no more frames are available assume we consume all bytes + consumed = 0; + } else { // error + LOG(ERROR) << "avcodec_receive_frame failed, err: " + << Util::generateErrorDesc(result); + return result; + } + return consumed; +} + +int Stream::decodePacket( + const AVPacket* packet, + DecoderOutputMessage* out, + bool headerOnly, + bool* hasMsg) { + int consumed; + bool gotFrame = false; + *hasMsg = false; + if ((consumed = analyzePacket(packet, &gotFrame)) >= 0 && + (packet == nullptr || gotFrame)) { + int result; + if ((result = getMessage(out, !gotFrame, headerOnly)) < 0) { + return result; // report error + } + *hasMsg = result > 0; + } + return consumed; +} + +int Stream::flush(DecoderOutputMessage* out, bool headerOnly) { + bool hasMsg = false; + int result = decodePacket(nullptr, out, headerOnly, &hasMsg); + if (result < 0) { + avcodec_flush_buffers(codecCtx_); + return result; + } + if (!hasMsg) { + avcodec_flush_buffers(codecCtx_); + return 0; + } + return 1; +} + +int Stream::getMessage(DecoderOutputMessage* out, bool flush, bool headerOnly) { + if (flush) { + // only flush of audio frames makes sense + if (format_.type == TYPE_AUDIO) { + int processed = 0; + size_t total = 0; + // grab all audio bytes by chunks + do { + if ((processed = copyFrameBytes(out->payload.get(), flush)) < 0) { + return processed; + } + total += processed; + } while (processed); + + if (total) { + // set header if message bytes are available + setHeader(&out->header, flush); + return 1; + } + } + return 0; + } else { + if (format_.type == TYPE_AUDIO) { + int processed = 0; + if ((processed = copyFrameBytes(out->payload.get(), flush)) < 0) { + return processed; + } + if (processed) { + // set header if message bytes are available + setHeader(&out->header, flush); + return 1; + } + return 0; + } else { + // set header + setHeader(&out->header, flush); + + if (headerOnly) { + // Only header is requisted + return 1; + } + + return copyFrameBytes(out->payload.get(), flush); + } + } +} + +void Stream::setHeader(DecoderHeader* header, bool flush) { + header->seqno = numGenerator_++; + + setFramePts(header, flush); + + if (convertPtsToWallTime_) { + keeper_.adjust(header->pts); + } + + header->format = format_; + header->keyFrame = 0; + header->fps = std::numeric_limits::quiet_NaN(); +} + +void Stream::setFramePts(DecoderHeader* header, bool flush) { + if (flush) { + header->pts = nextPts_; // already in us + } else { + header->pts = av_frame_get_best_effort_timestamp(frame_); + if (header->pts == AV_NOPTS_VALUE) { + header->pts = nextPts_; + } else { + header->pts = av_rescale_q( + header->pts, + inputCtx_->streams[format_.stream]->time_base, + timeBaseQ); + } + + switch (format_.type) { + case TYPE_AUDIO: + nextPts_ = header->pts + frame_->nb_samples * AV_TIME_BASE / fps_; + break; + case TYPE_VIDEO: + nextPts_ = header->pts + AV_TIME_BASE / fps_; + break; + default: + nextPts_ = header->pts; + } + } +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/stream.h b/torchvision/csrc/io/decoder/stream.h new file mode 100644 index 00000000000..97dfa8b5761 --- /dev/null +++ b/torchvision/csrc/io/decoder/stream.h @@ -0,0 +1,74 @@ +#pragma once + +#include +#include "defs.h" +#include "time_keeper.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one media stream (audio or video). + */ + +class Stream { + public: + Stream( + AVFormatContext* inputCtx, + MediaFormat format, + bool convertPtsToWallTime, + int64_t loggingUuid); + virtual ~Stream(); + + // returns 0 - on success or negative error + int openCodec(std::vector* metadata); + // returns 1 - if packet got consumed, 0 - if it's not, and < 0 on error + int decodePacket( + const AVPacket* packet, + DecoderOutputMessage* out, + bool headerOnly, + bool* hasMsg); + // returns stream index + int getIndex() const { + return format_.stream; + } + // returns 1 - if message got a payload, 0 - if it's not, and < 0 on error + int flush(DecoderOutputMessage* out, bool headerOnly); + // return media format + MediaFormat getMediaFormat() const { + return format_; + } + + protected: + virtual int initFormat() = 0; + // returns number processed bytes from packet, or negative error + virtual int analyzePacket(const AVPacket* packet, bool* gotFrame); + // returns number processed bytes from packet, or negative error + virtual int copyFrameBytes(ByteStorage* out, bool flush) = 0; + // sets output format + virtual void setHeader(DecoderHeader* header, bool flush); + // set frame pts + virtual void setFramePts(DecoderHeader* header, bool flush); + // finds codec + virtual AVCodec* findCodec(AVCodecParameters* params); + + private: + // returns 1 - if message got a payload, 0 - if it's not, and < 0 on error + int getMessage(DecoderOutputMessage* out, bool flush, bool headerOnly); + + protected: + AVFormatContext* const inputCtx_; + MediaFormat format_; + const bool convertPtsToWallTime_; + int64_t loggingUuid_; + + AVCodecContext* codecCtx_{nullptr}; + AVFrame* frame_{nullptr}; + + std::atomic numGenerator_{0}; + TimeKeeper keeper_; + // estimated next frame pts for flushing the last frame + int64_t nextPts_{0}; + double fps_{30.}; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/subtitle_sampler.cpp b/torchvision/csrc/io/decoder/subtitle_sampler.cpp new file mode 100644 index 00000000000..d0df24d3e35 --- /dev/null +++ b/torchvision/csrc/io/decoder/subtitle_sampler.cpp @@ -0,0 +1,46 @@ +#include "subtitle_sampler.h" +#include +#include "util.h" + +namespace ffmpeg { + +SubtitleSampler::~SubtitleSampler() { + cleanUp(); +} + +void SubtitleSampler::shutdown() { + cleanUp(); +} + +bool SubtitleSampler::init(const SamplerParameters& params) { + cleanUp(); + // set formats + params_ = params; + return true; +} + +int SubtitleSampler::sample(AVSubtitle* sub, ByteStorage* out) { + if (!sub || !out) { + return 0; // flush + } + + out->ensure(Util::size(*sub)); + + return Util::serialize(*sub, out); +} + +int SubtitleSampler::sample(const ByteStorage* in, ByteStorage* out) { + if (in && out) { + // Get a writable copy + if (size_t len = in->length()) { + out->ensure(len); + memcpy(out->writableTail(), in->data(), len); + } + return out->length(); + } + return 0; +} + +void SubtitleSampler::cleanUp() {} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/subtitle_sampler.h b/torchvision/csrc/io/decoder/subtitle_sampler.h new file mode 100644 index 00000000000..4aee811ed56 --- /dev/null +++ b/torchvision/csrc/io/decoder/subtitle_sampler.h @@ -0,0 +1,32 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class transcode audio frames from one format into another + */ + +class SubtitleSampler : public MediaSampler { + public: + SubtitleSampler() = default; + ~SubtitleSampler() override; + + bool init(const SamplerParameters& params) override; + int sample(const ByteStorage* in, ByteStorage* out) override; + void shutdown() override; + + // returns number processed/scaling bytes + int sample(AVSubtitle* sub, ByteStorage* out); + + // helper serialization/deserialization methods + static void serialize(const AVSubtitle& sub, ByteStorage* out); + static bool deserialize(const ByteStorage& buf, AVSubtitle* sub); + + private: + // close resources + void cleanUp(); +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/subtitle_stream.cpp b/torchvision/csrc/io/decoder/subtitle_stream.cpp new file mode 100644 index 00000000000..0d3fc9f12c1 --- /dev/null +++ b/torchvision/csrc/io/decoder/subtitle_stream.cpp @@ -0,0 +1,83 @@ +#include "subtitle_stream.h" +#include +#include +#include "util.h" + +namespace ffmpeg { +const AVRational timeBaseQ = AVRational{1, AV_TIME_BASE}; + +SubtitleStream::SubtitleStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const SubtitleFormat& format) + : Stream( + inputCtx, + MediaFormat::makeMediaFormat(format, index), + convertPtsToWallTime, + 0) { + memset(&sub_, 0, sizeof(sub_)); +} + +void SubtitleStream::releaseSubtitle() { + if (sub_.release) { + avsubtitle_free(&sub_); + memset(&sub_, 0, sizeof(sub_)); + } +} + +SubtitleStream::~SubtitleStream() { + releaseSubtitle(); + sampler_.shutdown(); +} + +int SubtitleStream::initFormat() { + if (!codecCtx_->subtitle_header) { + LOG(ERROR) << "No subtitle header found"; + } else { + VLOG(1) << "Subtitle header found!"; + } + return 0; +} + +int SubtitleStream::analyzePacket(const AVPacket* packet, bool* gotFrame) { + // clean-up + releaseSubtitle(); + // check flush packet + AVPacket avPacket; + av_init_packet(&avPacket); + avPacket.data = nullptr; + avPacket.size = 0; + auto pkt = packet ? *packet : avPacket; + int gotFramePtr = 0; + int result = avcodec_decode_subtitle2(codecCtx_, &sub_, &gotFramePtr, &pkt); + + if (result < 0) { + LOG(ERROR) << "avcodec_decode_subtitle2 failed, err: " + << Util::generateErrorDesc(result); + return result; + } else if (result == 0) { + result = pkt.size; // discard the rest of the package + } + + sub_.release = gotFramePtr; + *gotFrame = gotFramePtr > 0; + + // set proper pts in us + if (gotFramePtr) { + sub_.pts = av_rescale_q( + pkt.pts, inputCtx_->streams[format_.stream]->time_base, timeBaseQ); + } + + return result; +} + +int SubtitleStream::copyFrameBytes(ByteStorage* out, bool flush) { + return sampler_.sample(flush ? nullptr : &sub_, out); +} + +void SubtitleStream::setFramePts(DecoderHeader* header, bool) { + header->pts = sub_.pts; // already in us +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/subtitle_stream.h b/torchvision/csrc/io/decoder/subtitle_stream.h new file mode 100644 index 00000000000..6c366e11f50 --- /dev/null +++ b/torchvision/csrc/io/decoder/subtitle_stream.h @@ -0,0 +1,38 @@ +#pragma once + +#include "stream.h" +#include "subtitle_sampler.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one subtitle stream. + */ +struct AVSubtitleKeeper : AVSubtitle { + int64_t release{0}; +}; + +class SubtitleStream : public Stream { + public: + SubtitleStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const SubtitleFormat& format); + ~SubtitleStream() override; + + protected: + void setFramePts(DecoderHeader* header, bool flush) override; + + private: + int initFormat() override; + int analyzePacket(const AVPacket* packet, bool* gotFrame) override; + int copyFrameBytes(ByteStorage* out, bool flush) override; + void releaseSubtitle(); + + private: + SubtitleSampler sampler_; + AVSubtitleKeeper sub_; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/sync_decoder.cpp b/torchvision/csrc/io/decoder/sync_decoder.cpp new file mode 100644 index 00000000000..374b40838ea --- /dev/null +++ b/torchvision/csrc/io/decoder/sync_decoder.cpp @@ -0,0 +1,97 @@ +#include "sync_decoder.h" +#include + +namespace ffmpeg { + +SyncDecoder::AVByteStorage::AVByteStorage(size_t n) { + ensure(n); +} + +SyncDecoder::AVByteStorage::~AVByteStorage() { + av_free(buffer_); +} + +void SyncDecoder::AVByteStorage::ensure(size_t n) { + if (tail() < n) { + capacity_ = offset_ + length_ + n; + buffer_ = static_cast(av_realloc(buffer_, capacity_)); + } +} + +uint8_t* SyncDecoder::AVByteStorage::writableTail() { + CHECK_LE(offset_ + length_, capacity_); + return buffer_ + offset_ + length_; +} + +void SyncDecoder::AVByteStorage::append(size_t n) { + CHECK_LE(n, tail()); + length_ += n; +} + +void SyncDecoder::AVByteStorage::trim(size_t n) { + CHECK_LE(n, length_); + offset_ += n; + length_ -= n; +} + +const uint8_t* SyncDecoder::AVByteStorage::data() const { + return buffer_ + offset_; +} + +size_t SyncDecoder::AVByteStorage::length() const { + return length_; +} + +size_t SyncDecoder::AVByteStorage::tail() const { + CHECK_LE(offset_ + length_, capacity_); + return capacity_ - offset_ - length_; +} + +void SyncDecoder::AVByteStorage::clear() { + offset_ = 0; + length_ = 0; +} + +std::unique_ptr SyncDecoder::createByteStorage(size_t n) { + return std::make_unique(n); +} + +void SyncDecoder::onInit() { + eof_ = false; + queue_.clear(); +} + +int SyncDecoder::decode(DecoderOutputMessage* out, uint64_t timeoutMs) { + if (eof_ && queue_.empty()) { + return ENODATA; + } + + if (queue_.empty()) { + int result = getFrame(timeoutMs); + // assign EOF + eof_ = result == ENODATA; + // check unrecoverable error, any error but ENODATA + if (result && result != ENODATA) { + return result; + } + + // still empty + if (queue_.empty()) { + if (eof_) { + return ENODATA; + } else { + LOG(INFO) << "Queue is empty"; + return ETIMEDOUT; + } + } + } + + *out = std::move(queue_.front()); + queue_.pop_front(); + return 0; +} + +void SyncDecoder::push(DecoderOutputMessage&& buffer) { + queue_.push_back(std::move(buffer)); +} +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/sync_decoder.h b/torchvision/csrc/io/decoder/sync_decoder.h new file mode 100644 index 00000000000..b7cf7b625ac --- /dev/null +++ b/torchvision/csrc/io/decoder/sync_decoder.h @@ -0,0 +1,48 @@ +#pragma once + +#include +#include "decoder.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode media streams. + * Media bytes can be explicitly provided through read-callback + * or fetched internally by FFMPEG library + */ +class SyncDecoder : public Decoder { + public: + // Allocation of memory must be done with a proper alignment. + class AVByteStorage : public ByteStorage { + public: + explicit AVByteStorage(size_t n); + ~AVByteStorage() override; + void ensure(size_t n) override; + uint8_t* writableTail() override; + void append(size_t n) override; + void trim(size_t n) override; + const uint8_t* data() const override; + size_t length() const override; + size_t tail() const override; + void clear() override; + + private: + size_t offset_{0}; + size_t length_{0}; + size_t capacity_{0}; + uint8_t* buffer_{nullptr}; + }; + + public: + int decode(DecoderOutputMessage* out, uint64_t timeoutMs) override; + + private: + void push(DecoderOutputMessage&& buffer) override; + void onInit() override; + std::unique_ptr createByteStorage(size_t n) override; + + private: + std::list queue_; + bool eof_{false}; +}; +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/sync_decoder_test.cpp b/torchvision/csrc/io/decoder/sync_decoder_test.cpp new file mode 100644 index 00000000000..6109b12685e --- /dev/null +++ b/torchvision/csrc/io/decoder/sync_decoder_test.cpp @@ -0,0 +1,412 @@ +#include +#include +#include +#include "memory_buffer.h" +#include "sync_decoder.h" +#include "util.h" + +using namespace ffmpeg; + +namespace { +struct VideoFileStats { + std::string name; + size_t durationPts{0}; + int num{0}; + int den{0}; + int fps{0}; +}; + +void gotAllTestFiles( + const std::string& folder, + std::vector* stats) { + DIR* d = opendir(folder.c_str()); + CHECK(d); + struct dirent* dir; + while ((dir = readdir(d))) { + if (dir->d_type != DT_DIR && 0 != strcmp(dir->d_name, "README")) { + VideoFileStats item; + item.name = folder + '/' + dir->d_name; + LOG(INFO) << "Found video file: " << item.name; + stats->push_back(std::move(item)); + } + } + closedir(d); +} + +void gotFilesStats(std::vector& stats) { + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.formats = {MediaFormat(0)}; + params.headerOnly = true; + params.preventStaleness = false; + size_t avgProvUs = 0; + const size_t rounds = 100; + for (auto& item : stats) { + LOG(INFO) << "Decoding video file in memory: " << item.name; + FILE* f = fopen(item.name.c_str(), "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + CHECK_EQ(buffer.size(), fread(buffer.data(), 1, buffer.size(), f)); + fclose(f); + + for (size_t i = 0; i < rounds; ++i) { + SyncDecoder decoder; + std::vector metadata; + const auto now = std::chrono::steady_clock::now(); + CHECK(decoder.init( + params, + MemoryBuffer::getCallback(buffer.data(), buffer.size()), + &metadata)); + const auto then = std::chrono::steady_clock::now(); + decoder.shutdown(); + avgProvUs += + std::chrono::duration_cast(then - now) + .count(); + CHECK_EQ(metadata.size(), 1); + item.num = metadata[0].num; + item.den = metadata[0].den; + item.fps = metadata[0].fps; + item.durationPts = + av_rescale_q(metadata[0].duration, AV_TIME_BASE_Q, {1, item.fps}); + } + } + LOG(INFO) << "Probing (us) " << avgProvUs / stats.size() / rounds; +} + +size_t measurePerformanceUs( + const std::vector& stats, + size_t rounds, + size_t num, + size_t stride) { + size_t avgClipDecodingUs = 0; + std::srand(time(nullptr)); + for (const auto& item : stats) { + FILE* f = fopen(item.name.c_str(), "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + CHECK_EQ(buffer.size(), fread(buffer.data(), 1, buffer.size(), f)); + fclose(f); + + for (size_t i = 0; i < rounds; ++i) { + // randomy select clip + size_t rOffset = std::rand(); + size_t fOffset = rOffset % item.durationPts; + size_t clipFrames = num + (num - 1) * stride; + if (fOffset + clipFrames > item.durationPts) { + fOffset = item.durationPts - clipFrames; + } + + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.preventStaleness = false; + + for (size_t n = 0; n < num; ++n) { + std::list msgs; + + params.startOffset = + av_rescale_q(fOffset, {1, item.fps}, AV_TIME_BASE_Q); + params.endOffset = params.startOffset + 100; + + auto now = std::chrono::steady_clock::now(); + SyncDecoder decoder; + CHECK(decoder.init( + params, + MemoryBuffer::getCallback(buffer.data(), buffer.size()), + nullptr)); + DecoderOutputMessage out; + while (0 == decoder.decode(&out, params.timeoutMs)) { + msgs.push_back(std::move(out)); + } + + decoder.shutdown(); + + const auto then = std::chrono::steady_clock::now(); + + fOffset += 1 + stride; + + avgClipDecodingUs += + std::chrono::duration_cast(then - now) + .count(); + } + } + } + + return avgClipDecodingUs / rounds / num / stats.size(); +} + +void runDecoder(SyncDecoder& decoder) { + DecoderOutputMessage out; + size_t audioFrames = 0, videoFrames = 0, totalBytes = 0; + while (0 == decoder.decode(&out, 10000)) { + if (out.header.format.type == TYPE_AUDIO) { + ++audioFrames; + } else if (out.header.format.type == TYPE_VIDEO) { + ++videoFrames; + } else if (out.header.format.type == TYPE_SUBTITLE && out.payload) { + // deserialize + LOG(INFO) << "Deserializing subtitle"; + AVSubtitle sub; + memset(&sub, 0, sizeof(sub)); + EXPECT_TRUE(Util::deserialize(*out.payload, &sub)); + LOG(INFO) << "Found subtitles" + << ", num rects: " << sub.num_rects; + for (int i = 0; i < sub.num_rects; ++i) { + std::string text = "picture"; + if (sub.rects[i]->type == SUBTITLE_TEXT) { + text = sub.rects[i]->text; + } else if (sub.rects[i]->type == SUBTITLE_ASS) { + text = sub.rects[i]->ass; + } + + LOG(INFO) << "Rect num: " << i << ", type:" << sub.rects[i]->type + << ", text: " << text; + } + + avsubtitle_free(&sub); + } + if (out.payload) { + totalBytes += out.payload->length(); + } + } + LOG(INFO) << "Decoded audio frames: " << audioFrames + << ", video frames: " << videoFrames + << ", total bytes: " << totalBytes; +} +} // namespace + +TEST(SyncDecoder, TestSyncDecoderPerformance) { + // Measure the average time of decoding per clip + // 1. list of the videos in testing directory + // 2. for each video got number of frames with timestamps + // 3. randomly select frame offset + // 4. adjust offset for number frames and strides, + // if it's out out upper boundary + // 5. repeat multiple times, measuring and accumulating decoding time + // per clip. + /* + 1) 4 x 2 + 2) 8 x 8 + 3) 16 x 8 + 4) 32 x 4 + */ + const std::string kFolder = "pytorch/vision/test/assets/videos"; + std::vector stats; + gotAllTestFiles(kFolder, &stats); + gotFilesStats(stats); + + const size_t kRounds = 10; + + auto new4x2 = measurePerformanceUs(stats, kRounds, 4, 2); + auto new8x8 = measurePerformanceUs(stats, kRounds, 8, 8); + auto new16x8 = measurePerformanceUs(stats, kRounds, 16, 8); + auto new32x4 = measurePerformanceUs(stats, kRounds, 32, 4); + LOG(INFO) << "Clip decoding (us)" + << ", new(4x2): " << new4x2 << ", new(8x8): " << new8x8 + << ", new(16x8): " << new16x8 << ", new(32x4): " << new32x4; +} + +TEST(SyncDecoder, Test) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestSubtitles) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + params.uri = "vue/synergy/data/robotsub.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestHeadersOnly) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.headerOnly = true; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + + params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); + + params.uri = "pytorch/vision/test/assets/videos/SOX5yA1l24A.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); + + params.uri = "pytorch/vision/test/assets/videos/WUzgd7C1pWA.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestHeadersOnlyDownSampling) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.headerOnly = true; + MediaFormat format; + format.type = TYPE_AUDIO; + format.format.audio.samples = 8000; + params.formats.insert(format); + + format.type = TYPE_VIDEO; + format.format.video.width = 224; + format.format.video.height = 224; + params.formats.insert(format); + + params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); + + params.uri = "pytorch/vision/test/assets/videos/SOX5yA1l24A.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); + + params.uri = "pytorch/vision/test/assets/videos/WUzgd7C1pWA.mp4"; + CHECK(decoder.init(params, nullptr, nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestInitOnlyNoShutdown) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.seekAccuracy = 100000; + params.headerOnly = false; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4"; + std::vector metadata; + CHECK(decoder.init(params, nullptr, &metadata)); +} + +TEST(SyncDecoder, TestMemoryBuffer) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.endOffset = 9000000; + params.seekAccuracy = 10000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + + FILE* f = fopen( + "pytorch/vision/test/assets/videos/RATRACE_wave_f_nm_np1_fr_goo_37.avi", + "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + CHECK_EQ(buffer.size(), fread(buffer.data(), 1, buffer.size(), f)); + fclose(f); + CHECK(decoder.init( + params, + MemoryBuffer::getCallback(buffer.data(), buffer.size()), + nullptr)); + LOG(INFO) << "Decoding from memory bytes: " << buffer.size(); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestMemoryBufferNoSeekableWithFullRead) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.endOffset = 9000000; + params.seekAccuracy = 10000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + + FILE* f = fopen("pytorch/vision/test/assets/videos/R6llTwEh07w.mp4", "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + CHECK_EQ(buffer.size(), fread(buffer.data(), 1, buffer.size(), f)); + fclose(f); + + params.maxSeekableBytes = buffer.size() + 1; + MemoryBuffer object(buffer.data(), buffer.size()); + CHECK(decoder.init( + params, + [object](uint8_t* out, int size, int whence, uint64_t timeoutMs) mutable + -> int { + if (out) { // see defs.h file + // read mode + return object.read(out, size); + } + // seek mode + if (!timeoutMs) { + // seek capabilty, yes - no + return -1; + } + return object.seek(size, whence); + }, + nullptr)); + runDecoder(decoder); + decoder.shutdown(); +} + +TEST(SyncDecoder, TestMemoryBufferNoSeekableWithPartialRead) { + SyncDecoder decoder; + DecoderParameters params; + params.timeoutMs = 10000; + params.startOffset = 1000000; + params.endOffset = 9000000; + params.seekAccuracy = 10000; + params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')}; + + FILE* f = fopen("pytorch/vision/test/assets/videos/R6llTwEh07w.mp4", "rb"); + CHECK(f != nullptr); + fseek(f, 0, SEEK_END); + std::vector buffer(ftell(f)); + rewind(f); + CHECK_EQ(buffer.size(), fread(buffer.data(), 1, buffer.size(), f)); + fclose(f); + + params.maxSeekableBytes = buffer.size() / 2; + MemoryBuffer object(buffer.data(), buffer.size()); + CHECK(!decoder.init( + params, + [object](uint8_t* out, int size, int whence, uint64_t timeoutMs) mutable + -> int { + if (out) { // see defs.h file + // read mode + return object.read(out, size); + } + // seek mode + if (!timeoutMs) { + // seek capabilty, yes - no + return -1; + } + return object.seek(size, whence); + }, + nullptr)); +} diff --git a/torchvision/csrc/io/decoder/time_keeper.cpp b/torchvision/csrc/io/decoder/time_keeper.cpp new file mode 100644 index 00000000000..845c76cddc8 --- /dev/null +++ b/torchvision/csrc/io/decoder/time_keeper.cpp @@ -0,0 +1,35 @@ +#include "time_keeper.h" +#include "defs.h" + +namespace ffmpeg { + +namespace { +const long kMaxTimeBaseDiference = 10; +} + +long TimeKeeper::adjust(long& decoderTimestamp) { + const long now = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + if (startTime_ == 0) { + startTime_ = now; + } + if (streamTimestamp_ == 0) { + streamTimestamp_ = decoderTimestamp; + } + + const auto runOut = startTime_ + decoderTimestamp - streamTimestamp_; + + if (std::labs((now - runOut) / AV_TIME_BASE) > kMaxTimeBaseDiference) { + streamTimestamp_ = startTime_ - now + decoderTimestamp; + } + + const auto sleepAdvised = runOut - now; + + decoderTimestamp += startTime_ - streamTimestamp_; + + return sleepAdvised > 0 ? sleepAdvised : 0; +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/time_keeper.h b/torchvision/csrc/io/decoder/time_keeper.h new file mode 100644 index 00000000000..e4d4718c705 --- /dev/null +++ b/torchvision/csrc/io/decoder/time_keeper.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +namespace ffmpeg { + +/** + * Class keeps the track of the decoded timestamps (us) for media streams. + */ + +class TimeKeeper { + public: + TimeKeeper() = default; + + // adjust provided @timestamp to the corrected value + // return advised sleep time before next frame processing in (us) + long adjust(long& decoderTimestamp); + + private: + long startTime_{0}; + long streamTimestamp_{0}; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/util.cpp b/torchvision/csrc/io/decoder/util.cpp new file mode 100644 index 00000000000..774612d3927 --- /dev/null +++ b/torchvision/csrc/io/decoder/util.cpp @@ -0,0 +1,402 @@ +#include "util.h" +#include + +namespace ffmpeg { + +namespace Serializer { + +// fixed size types +template +inline size_t getSize(const T& x) { + return sizeof(x); +} + +template +inline bool serializeItem( + uint8_t* dest, + size_t len, + size_t& pos, + const T& src) { + VLOG(6) << "Generic serializeItem"; + const auto required = sizeof(src); + if (len < pos + required) { + return false; + } + memcpy(dest + pos, &src, required); + pos += required; + return true; +} + +template +inline bool deserializeItem( + const uint8_t* src, + size_t len, + size_t& pos, + T& dest) { + const auto required = sizeof(dest); + if (len < pos + required) { + return false; + } + memcpy(&dest, src + pos, required); + pos += required; + return true; +} + +// AVSubtitleRect specialization +inline size_t getSize(const AVSubtitleRect& x) { + auto rectBytes = [](const AVSubtitleRect& y) -> size_t { + size_t s = 0; + switch (y.type) { + case SUBTITLE_BITMAP: + for (int i = 0; i < y.nb_colors; ++i) { + s += sizeof(y.pict.linesize[i]); + s += y.pict.linesize[i]; + } + break; + case SUBTITLE_TEXT: + s += sizeof(size_t); + s += strlen(y.text); + break; + case SUBTITLE_ASS: + s += sizeof(size_t); + s += strlen(y.ass); + break; + default: + break; + } + return s; + }; + return getSize(x.x) + getSize(x.y) + getSize(x.w) + getSize(x.h) + + getSize(x.nb_colors) + getSize(x.type) + getSize(x.flags) + rectBytes(x); +} + +// AVSubtitle specialization +inline size_t getSize(const AVSubtitle& x) { + auto rectBytes = [](const AVSubtitle& y) -> size_t { + size_t s = getSize(y.num_rects); + for (unsigned i = 0; i < y.num_rects; ++i) { + s += getSize(*y.rects[i]); + } + return s; + }; + return getSize(x.format) + getSize(x.start_display_time) + + getSize(x.end_display_time) + getSize(x.pts) + rectBytes(x); +} + +inline bool serializeItem( + uint8_t* dest, + size_t len, + size_t& pos, + const AVSubtitleRect& src) { + auto rectSerialize = + [](uint8_t* d, size_t l, size_t& p, const AVSubtitleRect& x) -> size_t { + switch (x.type) { + case SUBTITLE_BITMAP: + for (int i = 0; i < x.nb_colors; ++i) { + if (!serializeItem(d, l, p, x.pict.linesize[i])) { + return false; + } + if (p + x.pict.linesize[i] > l) { + return false; + } + memcpy(d + p, x.pict.data[i], x.pict.linesize[i]); + p += x.pict.linesize[i]; + } + return true; + case SUBTITLE_TEXT: { + const size_t s = strlen(x.text); + if (!serializeItem(d, l, p, s)) { + return false; + } + if (p + s > l) { + return false; + } + memcpy(d + p, x.text, s); + p += s; + return true; + } + case SUBTITLE_ASS: { + const size_t s = strlen(x.ass); + if (!serializeItem(d, l, p, s)) { + return false; + } + if (p + s > l) { + return false; + } + memcpy(d + p, x.ass, s); + p += s; + return true; + } + default: + return true; + } + }; + return serializeItem(dest, len, pos, src.x) && + serializeItem(dest, len, pos, src.y) && + serializeItem(dest, len, pos, src.w) && + serializeItem(dest, len, pos, src.h) && + serializeItem(dest, len, pos, src.nb_colors) && + serializeItem(dest, len, pos, src.type) && + serializeItem(dest, len, pos, src.flags) && + rectSerialize(dest, len, pos, src); +} + +inline bool serializeItem( + uint8_t* dest, + size_t len, + size_t& pos, + const AVSubtitle& src) { + auto rectSerialize = + [](uint8_t* d, size_t l, size_t& p, const AVSubtitle& x) -> bool { + bool res = serializeItem(d, l, p, x.num_rects); + for (unsigned i = 0; res && i < x.num_rects; ++i) { + res = serializeItem(d, l, p, *(x.rects[i])); + } + return res; + }; + VLOG(6) << "AVSubtitle serializeItem"; + return serializeItem(dest, len, pos, src.format) && + serializeItem(dest, len, pos, src.start_display_time) && + serializeItem(dest, len, pos, src.end_display_time) && + serializeItem(dest, len, pos, src.pts) && + rectSerialize(dest, len, pos, src); +} + +inline bool deserializeItem( + const uint8_t* src, + size_t len, + size_t& pos, + AVSubtitleRect& dest) { + auto rectDeserialize = + [](const uint8_t* y, size_t l, size_t& p, AVSubtitleRect& x) -> bool { + switch (x.type) { + case SUBTITLE_BITMAP: + for (int i = 0; i < x.nb_colors; ++i) { + if (!deserializeItem(y, l, p, x.pict.linesize[i])) { + return false; + } + if (p + x.pict.linesize[i] > l) { + return false; + } + x.pict.data[i] = (uint8_t*)av_malloc(x.pict.linesize[i]); + memcpy(x.pict.data[i], y + p, x.pict.linesize[i]); + p += x.pict.linesize[i]; + } + return true; + case SUBTITLE_TEXT: { + size_t s = 0; + if (!deserializeItem(y, l, p, s)) { + return false; + } + if (p + s > l) { + return false; + } + x.text = (char*)av_malloc(s + 1); + memcpy(x.text, y + p, s); + x.text[s] = 0; + p += s; + return true; + } + case SUBTITLE_ASS: { + size_t s = 0; + if (!deserializeItem(y, l, p, s)) { + return false; + } + if (p + s > l) { + return false; + } + x.ass = (char*)av_malloc(s + 1); + memcpy(x.ass, y + p, s); + x.ass[s] = 0; + p += s; + return true; + } + default: + return true; + } + }; + + return deserializeItem(src, len, pos, dest.x) && + deserializeItem(src, len, pos, dest.y) && + deserializeItem(src, len, pos, dest.w) && + deserializeItem(src, len, pos, dest.h) && + deserializeItem(src, len, pos, dest.nb_colors) && + deserializeItem(src, len, pos, dest.type) && + deserializeItem(src, len, pos, dest.flags) && + rectDeserialize(src, len, pos, dest); +} + +inline bool deserializeItem( + const uint8_t* src, + size_t len, + size_t& pos, + AVSubtitle& dest) { + auto rectDeserialize = + [](const uint8_t* y, size_t l, size_t& p, AVSubtitle& x) -> bool { + bool res = deserializeItem(y, l, p, x.num_rects); + if (res && x.num_rects) { + x.rects = + (AVSubtitleRect**)av_malloc(x.num_rects * sizeof(AVSubtitleRect*)); + } + for (unsigned i = 0; res && i < x.num_rects; ++i) { + x.rects[i] = (AVSubtitleRect*)av_malloc(sizeof(AVSubtitleRect)); + memset(x.rects[i], 0, sizeof(AVSubtitleRect)); + res = deserializeItem(y, l, p, *x.rects[i]); + } + return res; + }; + return deserializeItem(src, len, pos, dest.format) && + deserializeItem(src, len, pos, dest.start_display_time) && + deserializeItem(src, len, pos, dest.end_display_time) && + deserializeItem(src, len, pos, dest.pts) && + rectDeserialize(src, len, pos, dest); +} +} // namespace Serializer + +namespace Util { +std::string generateErrorDesc(int errorCode) { + std::array buffer; + if (av_strerror(errorCode, buffer.data(), buffer.size()) < 0) { + return std::string("Unknown error code: ") + std::to_string(errorCode); + } + buffer.back() = 0; + return std::string(buffer.data()); +} + +size_t serialize(const AVSubtitle& sub, ByteStorage* out) { + const auto len = size(sub); + CHECK_LE(len, out->tail()); + size_t pos = 0; + if (!Serializer::serializeItem(out->writableTail(), len, pos, sub)) { + return 0; + } + out->append(len); + return len; +} + +bool deserialize(const ByteStorage& buf, AVSubtitle* sub) { + size_t pos = 0; + return Serializer::deserializeItem(buf.data(), buf.length(), pos, *sub); +} + +size_t size(const AVSubtitle& sub) { + return Serializer::getSize(sub); +} + +bool validateVideoFormat(const VideoFormat& f) { + // clang-format off + /* + Valid parameters values for decoder + ____________________________________________________________________________________ + | W | H | minDimension | maxDimension | cropImage | algorithm | + |__________________________________________________________________________________| + | 0 | 0 | 0 | 0 | N/A | original | + |__________________________________________________________________________________| + | >0 | 0 | N/A | N/A | N/A | scale keeping W | + |__________________________________________________________________________________| + | 0 | >0 | N/A | N/A | N/A | scale keeping H | + |__________________________________________________________________________________| + | >0 | >0 | N/A | N/A | 0 | stretch/scale | + |__________________________________________________________________________________| + | >0 | >0 | N/A | N/A | >0 | scale/crop | + |__________________________________________________________________________________| + | 0 | 0 | >0 | 0 | N/A |scale to min dimension | + |__________________________________________________________________________________| + | 0 | 0 | 0 | >0 | N/A |scale to max dimension | + |__________________________________________________________________________________| + | 0 | 0 | >0 | >0 | N/A |stretch to min/max dimension| + |_____|_____|______________|______________|___________|____________________________| + + */ + // clang-format on + return (f.width == 0 && // #1, #6, #7 and #8 + f.height == 0 && f.cropImage == 0) || + (f.width != 0 && // #4 and #5 + f.height != 0 && f.minDimension == 0 && f.maxDimension == 0) || + (((f.width != 0 && // #2 + f.height == 0) || + (f.width == 0 && // #3 + f.height != 0)) && + f.minDimension == 0 && f.maxDimension == 0 && f.cropImage == 0); +} + +void setFormatDimensions( + size_t& destW, + size_t& destH, + size_t userW, + size_t userH, + size_t srcW, + size_t srcH, + size_t minDimension, + size_t maxDimension, + size_t cropImage) { + // rounding rules + // int -> double -> round up + // if fraction is >= 0.5 or round down if fraction is < 0.5 + // int result = double(value) + 0.5 + // here we rounding double to int according to the above rule + + // #1, #6, #7 and #8 + if (userW == 0 && userH == 0) { + if (minDimension > 0 && maxDimension == 0) { // #6 + if (srcW > srcH) { + // landscape + destH = minDimension; + destW = round(double(srcW * minDimension) / srcH); + } else { + // portrait + destW = minDimension; + destH = round(double(srcH * minDimension) / srcW); + } + } else if (minDimension == 0 && maxDimension > 0) { // #7 + if (srcW > srcH) { + // landscape + destW = maxDimension; + destH = round(double(srcH * maxDimension) / srcW); + } else { + // portrait + destH = maxDimension; + destW = round(double(srcW * maxDimension) / srcH); + } + } else if (minDimension > 0 && maxDimension > 0) { // #8 + if (srcW > srcH) { + // landscape + destW = maxDimension; + destH = minDimension; + } else { + // portrait + destW = minDimension; + destH = maxDimension; + } + } else { // #1 + destW = srcW; + destH = srcH; + } + } else if (userW != 0 && userH == 0) { // #2 + destW = userW; + destH = round(double(srcH * userW) / srcW); + } else if (userW == 0 && userH != 0) { // #3 + destW = round(double(srcW * userH) / srcH); + destH = userH; + } else { // userW != 0 && userH != 0 + if (cropImage == 0) { // #4 + destW = userW; + destH = userH; + } else { // #5 + double userSlope = double(userH) / userW; + double srcSlope = double(srcH) / srcW; + if (srcSlope < userSlope) { + destW = round(double(srcW * userH) / srcH); + destH = userH; + } else { + destW = userW; + destH = round(double(srcH * userW) / srcW); + } + } + } + // prevent zeros + destW = std::max(destW, size_t(1UL)); + destH = std::max(destH, size_t(1UL)); +} +} // namespace Util +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/util.h b/torchvision/csrc/io/decoder/util.h new file mode 100644 index 00000000000..01b550e5bbc --- /dev/null +++ b/torchvision/csrc/io/decoder/util.h @@ -0,0 +1,28 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * FFMPEG library utility functions. + */ + +namespace Util { +std::string generateErrorDesc(int errorCode); +size_t serialize(const AVSubtitle& sub, ByteStorage* out); +bool deserialize(const ByteStorage& buf, AVSubtitle* sub); +size_t size(const AVSubtitle& sub); +void setFormatDimensions( + size_t& destW, + size_t& destH, + size_t userW, + size_t userH, + size_t srcW, + size_t srcH, + size_t minDimension, + size_t maxDimension, + size_t cropImage); +bool validateVideoFormat(const VideoFormat& format); +} // namespace Util +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/util_test.cpp b/torchvision/csrc/io/decoder/util_test.cpp new file mode 100644 index 00000000000..78de08b7139 --- /dev/null +++ b/torchvision/csrc/io/decoder/util_test.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include "util.h" + +TEST(Util, TestSetFormatDimensions) { + // clang-format off + const size_t test_cases[][9] = { + // (userW, userH, srcW, srcH, minDimension, maxDimension, cropImage, destW, destH) + {0, 0, 172, 128, 0, 0, 0, 172, 128}, // #1 + {86, 0, 172, 128, 0, 0, 0, 86, 64}, // #2 + {64, 0, 128, 172, 0, 0, 0, 64, 86}, // #2 + {0, 32, 172, 128, 0, 0, 0, 43, 32}, // #3 + {32, 0, 128, 172, 0, 0, 0, 32, 43}, // #3 + {60, 50, 172, 128, 0, 0, 0, 60, 50}, // #4 + {50, 60, 128, 172, 0, 0, 0, 50, 60}, // #4 + {86, 40, 172, 128, 0, 0, 1, 86, 64}, // #5 + {86, 92, 172, 128, 0, 0, 1, 124, 92}, // #5 + {0, 0, 172, 128, 256, 0, 0, 344, 256}, // #6 + {0, 0, 128, 172, 256, 0, 0, 256, 344}, // #6 + {0, 0, 128, 172, 0, 344, 0, 256, 344}, // #7 + {0, 0, 172, 128, 0, 344, 0, 344, 256}, // #7 + {0, 0, 172, 128, 100, 344, 0, 344, 100},// #8 + {0, 0, 128, 172, 100, 344, 0, 100, 344} // #8 + }; + // clang-format onn + + for (const auto& tc : test_cases) { + size_t destW = 0; + size_t destH = 0; + ffmpeg::Util::setFormatDimensions(destW, destH, tc[0], tc[1], tc[2], tc[3], tc[4], tc[5], tc[6]); + CHECK(destW == tc[7]); + CHECK(destH == tc[8]); + } +} diff --git a/torchvision/csrc/io/decoder/video_sampler.cpp b/torchvision/csrc/io/decoder/video_sampler.cpp new file mode 100644 index 00000000000..5b9726b7c6c --- /dev/null +++ b/torchvision/csrc/io/decoder/video_sampler.cpp @@ -0,0 +1,261 @@ +#include "video_sampler.h" +#include +#include "util.h" + +// www.ffmpeg.org/doxygen/0.5/swscale-example_8c-source.html + +namespace ffmpeg { + +namespace { +int preparePlanes( + const VideoFormat& fmt, + const uint8_t* buffer, + uint8_t** planes, + int* lineSize) { + int result; + + if ((result = av_image_fill_arrays( + planes, + lineSize, + buffer, + (AVPixelFormat)fmt.format, + fmt.width, + fmt.height, + 1)) < 0) { + LOG(ERROR) << "av_image_fill_arrays failed, err: " + << Util::generateErrorDesc(result); + } + return result; +} + +int transformImage( + SwsContext* context, + const uint8_t* const srcSlice[], + int srcStride[], + VideoFormat inFormat, + VideoFormat outFormat, + uint8_t* out, + uint8_t* planes[], + int lines[]) { + int result; + if ((result = preparePlanes(outFormat, out, planes, lines)) < 0) { + return result; + } + + if ((result = sws_scale( + context, srcSlice, srcStride, 0, inFormat.height, planes, lines)) < + 0) { + LOG(ERROR) << "sws_scale failed, err: " << Util::generateErrorDesc(result); + return result; + } + return 0; +} +} // namespace + +VideoSampler::VideoSampler(int swsFlags, int64_t loggingUuid) + : swsFlags_(swsFlags), loggingUuid_(loggingUuid) {} + +VideoSampler::~VideoSampler() { + cleanUp(); +} + +void VideoSampler::shutdown() { + cleanUp(); +} + +bool VideoSampler::init(const SamplerParameters& params) { + cleanUp(); + + if (params.out.video.cropImage != 0) { + if (!Util::validateVideoFormat(params.out.video)) { + LOG(ERROR) << "Invalid video format" + << ", width: " << params.out.video.width + << ", height: " << params.out.video.height + << ", format: " << params.out.video.format + << ", minDimension: " << params.out.video.minDimension + << ", crop: " << params.out.video.cropImage; + + return false; + } + + scaleFormat_.format = params.out.video.format; + Util::setFormatDimensions( + scaleFormat_.width, + scaleFormat_.height, + params.out.video.width, + params.out.video.height, + params.in.video.width, + params.in.video.height, + 0, + 0, + 1); + + if (!(scaleFormat_ == params_.out.video)) { // crop required + cropContext_ = sws_getContext( + params.out.video.width, + params.out.video.height, + (AVPixelFormat)params.out.video.format, + params.out.video.width, + params.out.video.height, + (AVPixelFormat)params.out.video.format, + swsFlags_, + nullptr, + nullptr, + nullptr); + + if (!cropContext_) { + LOG(ERROR) << "sws_getContext failed for crop context"; + return false; + } + + const auto scaleImageSize = av_image_get_buffer_size( + (AVPixelFormat)scaleFormat_.format, + scaleFormat_.width, + scaleFormat_.height, + 1); + scaleBuffer_.resize(scaleImageSize); + } + } else { + scaleFormat_ = params.out.video; + } + + VLOG(1) << "Input format #" << loggingUuid_ << ", width " + << params.in.video.width << ", height " << params.in.video.height + << ", format " << params.in.video.format << ", minDimension " + << params.in.video.minDimension << ", cropImage " + << params.in.video.cropImage; + VLOG(1) << "Scale format #" << loggingUuid_ << ", width " + << scaleFormat_.width << ", height " << scaleFormat_.height + << ", format " << scaleFormat_.format << ", minDimension " + << scaleFormat_.minDimension << ", cropImage " + << scaleFormat_.cropImage; + VLOG(1) << "Crop format #" << loggingUuid_ << ", width " + << params.out.video.width << ", height " << params.out.video.height + << ", format " << params.out.video.format << ", minDimension " + << params.out.video.minDimension << ", cropImage " + << params.out.video.cropImage; + + scaleContext_ = sws_getContext( + params.in.video.width, + params.in.video.height, + (AVPixelFormat)params.in.video.format, + scaleFormat_.width, + scaleFormat_.height, + (AVPixelFormat)scaleFormat_.format, + swsFlags_, + nullptr, + nullptr, + nullptr); + + // set output format + params_ = params; + + return scaleContext_ != nullptr; +} + +int VideoSampler::sample( + const uint8_t* const srcSlice[], + int srcStride[], + ByteStorage* out) { + int result; + // scaled and cropped image + int outImageSize = av_image_get_buffer_size( + (AVPixelFormat)params_.out.video.format, + params_.out.video.width, + params_.out.video.height, + 1); + + out->ensure(outImageSize); + + uint8_t* scalePlanes[4] = {nullptr}; + int scaleLines[4] = {0}; + // perform scale first + if ((result = transformImage( + scaleContext_, + srcSlice, + srcStride, + params_.in.video, + scaleFormat_, + // for crop use internal buffer + cropContext_ ? scaleBuffer_.data() : out->writableTail(), + scalePlanes, + scaleLines))) { + return result; + } + + // is crop required? + if (cropContext_) { + uint8_t* cropPlanes[4] = {nullptr}; + int cropLines[4] = {0}; + + if (params_.out.video.height < scaleFormat_.height) { + // Destination image is wider of source image: cut top and bottom + for (size_t i = 0; i < 4 && scalePlanes[i] != nullptr; ++i) { + scalePlanes[i] += scaleLines[i] * + (scaleFormat_.height - params_.out.video.height) / 2; + } + } else { + // Source image is wider of destination image: cut sides + for (size_t i = 0; i < 4 && scalePlanes[i] != nullptr; ++i) { + scalePlanes[i] += scaleLines[i] * + (scaleFormat_.width - params_.out.video.width) / 2 / + scaleFormat_.width; + } + } + + // crop image + if ((result = transformImage( + cropContext_, + scalePlanes, + scaleLines, + params_.out.video, + params_.out.video, + out->writableTail(), + cropPlanes, + cropLines))) { + return result; + } + } + + out->append(outImageSize); + return outImageSize; +} + +int VideoSampler::sample(AVFrame* frame, ByteStorage* out) { + if (!frame) { + return 0; // no flush for videos + } + + return sample(frame->data, frame->linesize, out); +} + +int VideoSampler::sample(const ByteStorage* in, ByteStorage* out) { + if (!in) { + return 0; // no flush for videos + } + + int result; + uint8_t* inPlanes[4] = {nullptr}; + int inLineSize[4] = {0}; + + if ((result = preparePlanes( + params_.in.video, in->data(), inPlanes, inLineSize)) < 0) { + return result; + } + + return sample(inPlanes, inLineSize, out); +} + +void VideoSampler::cleanUp() { + if (scaleContext_) { + sws_freeContext(scaleContext_); + scaleContext_ = nullptr; + } + if (cropContext_) { + sws_freeContext(cropContext_); + cropContext_ = nullptr; + scaleBuffer_.clear(); + } +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/video_sampler.h b/torchvision/csrc/io/decoder/video_sampler.h new file mode 100644 index 00000000000..47247f2c0c5 --- /dev/null +++ b/torchvision/csrc/io/decoder/video_sampler.h @@ -0,0 +1,44 @@ +#pragma once + +#include "defs.h" + +namespace ffmpeg { + +/** + * Class transcode video frames from one format into another + */ + +class VideoSampler : public MediaSampler { + public: + VideoSampler(int swsFlags = SWS_AREA, int64_t loggingUuid = 0); + + ~VideoSampler() override; + + // MediaSampler overrides + bool init(const SamplerParameters& params) override; + int sample(const ByteStorage* in, ByteStorage* out) override; + void shutdown() override; + + // returns number processed/scaling bytes + int sample(AVFrame* frame, ByteStorage* out); + int getImageBytes() const; + + private: + // close resources + void cleanUp(); + // helper functions for rescaling, cropping, etc. + int sample( + const uint8_t* const srcSlice[], + int srcStride[], + ByteStorage* out); + + private: + VideoFormat scaleFormat_; + SwsContext* scaleContext_{nullptr}; + SwsContext* cropContext_{nullptr}; + int swsFlags_{SWS_AREA}; + std::vector scaleBuffer_; + int64_t loggingUuid_{0}; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/video_stream.cpp b/torchvision/csrc/io/decoder/video_stream.cpp new file mode 100644 index 00000000000..a9e20434fe0 --- /dev/null +++ b/torchvision/csrc/io/decoder/video_stream.cpp @@ -0,0 +1,126 @@ +#include "video_stream.h" +#include +#include "util.h" + +namespace ffmpeg { + +namespace { +bool operator==(const VideoFormat& x, const AVFrame& y) { + return x.width == y.width && x.height == y.height && x.format == y.format; +} + +bool operator==(const VideoFormat& x, const AVCodecContext& y) { + return x.width == y.width && x.height == y.height && x.format == y.pix_fmt; +} + +VideoFormat& toVideoFormat(VideoFormat& x, const AVFrame& y) { + x.width = y.width; + x.height = y.height; + x.format = y.format; + return x; +} + +VideoFormat& toVideoFormat(VideoFormat& x, const AVCodecContext& y) { + x.width = y.width; + x.height = y.height; + x.format = y.pix_fmt; + return x; +} +} // namespace + +VideoStream::VideoStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const VideoFormat& format, + int64_t loggingUuid) + : Stream( + inputCtx, + MediaFormat::makeMediaFormat(format, index), + convertPtsToWallTime, + loggingUuid) {} + +VideoStream::~VideoStream() { + if (sampler_) { + sampler_->shutdown(); + sampler_.reset(); + } +} + +int VideoStream::initFormat() { + // set output format + if (!Util::validateVideoFormat(format_.format.video)) { + LOG(ERROR) << "Invalid video format" + << ", width: " << format_.format.video.width + << ", height: " << format_.format.video.height + << ", format: " << format_.format.video.format + << ", minDimension: " << format_.format.video.minDimension + << ", crop: " << format_.format.video.cropImage; + return -1; + } + + // keep aspect ratio + Util::setFormatDimensions( + format_.format.video.width, + format_.format.video.height, + format_.format.video.width, + format_.format.video.height, + codecCtx_->width, + codecCtx_->height, + format_.format.video.minDimension, + format_.format.video.maxDimension, + 0); + + if (format_.format.video.format == AV_PIX_FMT_NONE) { + format_.format.video.format = codecCtx_->pix_fmt; + } + return format_.format.video.width != 0 && format_.format.video.height != 0 && + format_.format.video.format != AV_PIX_FMT_NONE + ? 0 + : -1; +} + +int VideoStream::copyFrameBytes(ByteStorage* out, bool flush) { + if (!sampler_) { + sampler_ = std::make_unique(SWS_AREA, loggingUuid_); + } + + // check if input format gets changed + if (flush ? !(sampler_->getInputFormat().video == *codecCtx_) + : !(sampler_->getInputFormat().video == *frame_)) { + // - reinit sampler + SamplerParameters params; + params.type = format_.type; + params.out = format_.format; + params.in = FormatUnion(0); + flush ? toVideoFormat(params.in.video, *codecCtx_) + : toVideoFormat(params.in.video, *frame_); + if (!sampler_->init(params)) { + return -1; + } + + VLOG(1) << "Set input video sampler format" + << ", width: " << params.in.video.width + << ", height: " << params.in.video.height + << ", format: " << params.in.video.format + << " : output video sampler format" + << ", width: " << format_.format.video.width + << ", height: " << format_.format.video.height + << ", format: " << format_.format.video.format + << ", minDimension: " << format_.format.video.minDimension + << ", crop: " << format_.format.video.cropImage; + } + + return sampler_->sample(flush ? nullptr : frame_, out); +} + +void VideoStream::setHeader(DecoderHeader* header, bool flush) { + Stream::setHeader(header, flush); + if (!flush) { // no frames for video flush + header->keyFrame = frame_->key_frame; + header->fps = av_q2d(av_guess_frame_rate( + inputCtx_, inputCtx_->streams[format_.stream], nullptr)); + } +} + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/decoder/video_stream.h b/torchvision/csrc/io/decoder/video_stream.h new file mode 100644 index 00000000000..e6a8bf02b65 --- /dev/null +++ b/torchvision/csrc/io/decoder/video_stream.h @@ -0,0 +1,31 @@ +#pragma once + +#include "stream.h" +#include "video_sampler.h" + +namespace ffmpeg { + +/** + * Class uses FFMPEG library to decode one video stream. + */ + +class VideoStream : public Stream { + public: + VideoStream( + AVFormatContext* inputCtx, + int index, + bool convertPtsToWallTime, + const VideoFormat& format, + int64_t loggingUuid); + ~VideoStream() override; + + private: + int initFormat() override; + int copyFrameBytes(ByteStorage* out, bool flush) override; + void setHeader(DecoderHeader* header, bool flush) override; + + private: + std::unique_ptr sampler_; +}; + +} // namespace ffmpeg diff --git a/torchvision/csrc/io/image/cpu/common_jpeg.cpp b/torchvision/csrc/io/image/cpu/common_jpeg.cpp new file mode 100644 index 00000000000..4c993106b45 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/common_jpeg.cpp @@ -0,0 +1,26 @@ +#include "common_jpeg.h" + +namespace vision { +namespace image { +namespace detail { + +#if JPEG_FOUND +void torch_jpeg_error_exit(j_common_ptr cinfo) { + /* cinfo->err really points to a torch_jpeg_error_mgr struct, so coerce + * pointer */ + torch_jpeg_error_ptr myerr = (torch_jpeg_error_ptr)cinfo->err; + + /* Always display the message. */ + /* We could postpone this until after returning, if we chose. */ + // (*cinfo->err->output_message)(cinfo); + /* Create the message */ + (*(cinfo->err->format_message))(cinfo, myerr->jpegLastErrorMsg); + + /* Return control to the setjmp point */ + longjmp(myerr->setjmp_buffer, 1); +} +#endif + +} // namespace detail +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/common_jpeg.h b/torchvision/csrc/io/image/cpu/common_jpeg.h new file mode 100644 index 00000000000..7f7f9f0ccf1 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/common_jpeg.h @@ -0,0 +1,27 @@ +#pragma once + +#if JPEG_FOUND +#include + +#include +#include + +namespace vision { +namespace image { +namespace detail { + +static const JOCTET EOI_BUFFER[1] = {JPEG_EOI}; +struct torch_jpeg_error_mgr { + struct jpeg_error_mgr pub; /* "public" fields */ + char jpegLastErrorMsg[JMSG_LENGTH_MAX]; /* error messages */ + jmp_buf setjmp_buffer; /* for return to caller */ +}; + +using torch_jpeg_error_ptr = struct torch_jpeg_error_mgr*; +void torch_jpeg_error_exit(j_common_ptr cinfo); + +} // namespace detail +} // namespace image +} // namespace vision + +#endif diff --git a/torchvision/csrc/io/image/cpu/common_png.h b/torchvision/csrc/io/image/cpu/common_png.h new file mode 100644 index 00000000000..68400d48e05 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/common_png.h @@ -0,0 +1,6 @@ +#pragma once + +#if PNG_FOUND +#include +#include +#endif diff --git a/torchvision/csrc/io/image/cpu/decode_image.cpp b/torchvision/csrc/io/image/cpu/decode_image.cpp new file mode 100644 index 00000000000..1cc05dc76ca --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_image.cpp @@ -0,0 +1,35 @@ +#include "decode_image.h" + +#include "decode_jpeg.h" +#include "decode_png.h" + +namespace vision { +namespace image { + +torch::Tensor decode_image(const torch::Tensor& data, ImageReadMode mode) { + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); + // Check that the input tensor is 1-dimensional + TORCH_CHECK( + data.dim() == 1 && data.numel() > 0, + "Expected a non empty 1-dimensional tensor"); + + auto datap = data.data_ptr(); + + const uint8_t jpeg_signature[3] = {255, 216, 255}; // == "\xFF\xD8\xFF" + const uint8_t png_signature[4] = {137, 80, 78, 71}; // == "\211PNG" + + if (memcmp(jpeg_signature, datap, 3) == 0) { + return decode_jpeg(data, mode); + } else if (memcmp(png_signature, datap, 4) == 0) { + return decode_png(data, mode); + } else { + TORCH_CHECK( + false, + "Unsupported image file. Only jpeg and png ", + "are currently supported."); + } +} + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_image.h b/torchvision/csrc/io/image/cpu/decode_image.h new file mode 100644 index 00000000000..853d6d91afa --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_image.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include "../image_read_mode.h" + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor decode_image( + const torch::Tensor& data, + ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp new file mode 100644 index 00000000000..c6e971c3b12 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp @@ -0,0 +1,157 @@ +#include "decode_jpeg.h" +#include "common_jpeg.h" + +namespace vision { +namespace image { + +#if !JPEG_FOUND +torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) { + TORCH_CHECK( + false, "decode_jpeg: torchvision not compiled with libjpeg support"); +} +#else + +using namespace detail; + +namespace { + +struct torch_jpeg_mgr { + struct jpeg_source_mgr pub; + const JOCTET* data; + size_t len; +}; + +static void torch_jpeg_init_source(j_decompress_ptr cinfo) {} + +static boolean torch_jpeg_fill_input_buffer(j_decompress_ptr cinfo) { + // No more data. Probably an incomplete image; Raise exception. + torch_jpeg_error_ptr myerr = (torch_jpeg_error_ptr)cinfo->err; + strcpy(myerr->jpegLastErrorMsg, "Image is incomplete or truncated"); + longjmp(myerr->setjmp_buffer, 1); +} + +static void torch_jpeg_skip_input_data(j_decompress_ptr cinfo, long num_bytes) { + torch_jpeg_mgr* src = (torch_jpeg_mgr*)cinfo->src; + if (src->pub.bytes_in_buffer < (size_t)num_bytes) { + // Skipping over all of remaining data; output EOI. + src->pub.next_input_byte = EOI_BUFFER; + src->pub.bytes_in_buffer = 1; + } else { + // Skipping over only some of the remaining data. + src->pub.next_input_byte += num_bytes; + src->pub.bytes_in_buffer -= num_bytes; + } +} + +static void torch_jpeg_term_source(j_decompress_ptr cinfo) {} + +static void torch_jpeg_set_source_mgr( + j_decompress_ptr cinfo, + const unsigned char* data, + size_t len) { + torch_jpeg_mgr* src; + if (cinfo->src == 0) { // if this is first time; allocate memory + cinfo->src = (struct jpeg_source_mgr*)(*cinfo->mem->alloc_small)( + (j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(torch_jpeg_mgr)); + } + src = (torch_jpeg_mgr*)cinfo->src; + src->pub.init_source = torch_jpeg_init_source; + src->pub.fill_input_buffer = torch_jpeg_fill_input_buffer; + src->pub.skip_input_data = torch_jpeg_skip_input_data; + src->pub.resync_to_restart = jpeg_resync_to_restart; // default + src->pub.term_source = torch_jpeg_term_source; + // fill the buffers + src->data = (const JOCTET*)data; + src->len = len; + src->pub.bytes_in_buffer = len; + src->pub.next_input_byte = src->data; +} + +} // namespace + +torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) { + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); + // Check that the input tensor is 1-dimensional + TORCH_CHECK( + data.dim() == 1 && data.numel() > 0, + "Expected a non empty 1-dimensional tensor"); + + struct jpeg_decompress_struct cinfo; + struct torch_jpeg_error_mgr jerr; + + auto datap = data.data_ptr(); + // Setup decompression structure + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = torch_jpeg_error_exit; + /* Establish the setjmp return context for my_error_exit to use. */ + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. + * We need to clean up the JPEG object. + */ + jpeg_destroy_decompress(&cinfo); + TORCH_CHECK(false, jerr.jpegLastErrorMsg); + } + + jpeg_create_decompress(&cinfo); + torch_jpeg_set_source_mgr(&cinfo, datap, data.numel()); + + // read info from header. + jpeg_read_header(&cinfo, TRUE); + + int channels = cinfo.num_components; + + if (mode != IMAGE_READ_MODE_UNCHANGED) { + switch (mode) { + case IMAGE_READ_MODE_GRAY: + if (cinfo.jpeg_color_space != JCS_GRAYSCALE) { + cinfo.out_color_space = JCS_GRAYSCALE; + channels = 1; + } + break; + case IMAGE_READ_MODE_RGB: + if (cinfo.jpeg_color_space != JCS_RGB) { + cinfo.out_color_space = JCS_RGB; + channels = 3; + } + break; + /* + * Libjpeg does not support converting from CMYK to grayscale etc. There + * is a way to do this but it involves converting it manually to RGB: + * https://github.com/tensorflow/tensorflow/blob/86871065265b04e0db8ca360c046421efb2bdeb4/tensorflow/core/lib/jpeg/jpeg_mem.cc#L284-L313 + */ + default: + jpeg_destroy_decompress(&cinfo); + TORCH_CHECK(false, "The provided mode is not supported for JPEG files"); + } + + jpeg_calc_output_dimensions(&cinfo); + } + + jpeg_start_decompress(&cinfo); + + int height = cinfo.output_height; + int width = cinfo.output_width; + + int stride = width * channels; + auto tensor = + torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8); + auto ptr = tensor.data_ptr(); + while (cinfo.output_scanline < cinfo.output_height) { + /* jpeg_read_scanlines expects an array of pointers to scanlines. + * Here the array is only one element long, but you could ask for + * more than one scanline at a time if that's more convenient. + */ + jpeg_read_scanlines(&cinfo, &ptr, 1); + ptr += stride; + } + + jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + return tensor.permute({2, 0, 1}); +} + +#endif + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.h b/torchvision/csrc/io/image/cpu/decode_jpeg.h new file mode 100644 index 00000000000..97ed3d51a54 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_jpeg.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include "../image_read_mode.h" + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor decode_jpeg( + const torch::Tensor& data, + ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp new file mode 100644 index 00000000000..5ee33635a1c --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_png.cpp @@ -0,0 +1,169 @@ +#include "decode_png.h" +#include "common_png.h" + +namespace vision { +namespace image { + +#if !PNG_FOUND +torch::Tensor decode_png(const torch::Tensor& data, ImageReadMode mode) { + TORCH_CHECK( + false, "decode_png: torchvision not compiled with libPNG support"); +} +#else + +torch::Tensor decode_png(const torch::Tensor& data, ImageReadMode mode) { + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); + // Check that the input tensor is 1-dimensional + TORCH_CHECK( + data.dim() == 1 && data.numel() > 0, + "Expected a non empty 1-dimensional tensor"); + + auto png_ptr = + png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); + TORCH_CHECK(png_ptr, "libpng read structure allocation failed!") + auto info_ptr = png_create_info_struct(png_ptr); + if (!info_ptr) { + png_destroy_read_struct(&png_ptr, nullptr, nullptr); + // Seems redundant with the if statement. done here to avoid leaking memory. + TORCH_CHECK(info_ptr, "libpng info structure allocation failed!") + } + + auto datap = data.accessor().data(); + + if (setjmp(png_jmpbuf(png_ptr)) != 0) { + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK(false, "Internal error."); + } + auto is_png = !png_sig_cmp(datap, 0, 8); + TORCH_CHECK(is_png, "Content is not png!") + + struct Reader { + png_const_bytep ptr; + } reader; + reader.ptr = png_const_bytep(datap) + 8; + + auto read_callback = + [](png_structp png_ptr, png_bytep output, png_size_t bytes) { + auto reader = static_cast(png_get_io_ptr(png_ptr)); + std::copy(reader->ptr, reader->ptr + bytes, output); + reader->ptr += bytes; + }; + png_set_sig_bytes(png_ptr, 8); + png_set_read_fn(png_ptr, &reader, read_callback); + png_read_info(png_ptr, info_ptr); + + png_uint_32 width, height; + int bit_depth, color_type; + auto retval = png_get_IHDR( + png_ptr, + info_ptr, + &width, + &height, + &bit_depth, + &color_type, + nullptr, + nullptr, + nullptr); + + if (retval != 1) { + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK(retval == 1, "Could read image metadata from content.") + } + + int channels = png_get_channels(png_ptr, info_ptr); + + if (mode != IMAGE_READ_MODE_UNCHANGED) { + // TODO: consider supporting PNG_INFO_tRNS + bool is_palette = (color_type & PNG_COLOR_MASK_PALETTE) != 0; + bool has_color = (color_type & PNG_COLOR_MASK_COLOR) != 0; + bool has_alpha = (color_type & PNG_COLOR_MASK_ALPHA) != 0; + + switch (mode) { + case IMAGE_READ_MODE_GRAY: + if (color_type != PNG_COLOR_TYPE_GRAY) { + if (is_palette) { + png_set_palette_to_rgb(png_ptr); + has_alpha = true; + } + + if (has_alpha) { + png_set_strip_alpha(png_ptr); + } + + if (has_color) { + png_set_rgb_to_gray(png_ptr, 1, 0.2989, 0.587); + } + channels = 1; + } + break; + case IMAGE_READ_MODE_GRAY_ALPHA: + if (color_type != PNG_COLOR_TYPE_GRAY_ALPHA) { + if (is_palette) { + png_set_palette_to_rgb(png_ptr); + has_alpha = true; + } + + if (!has_alpha) { + png_set_add_alpha(png_ptr, (1 << bit_depth) - 1, PNG_FILLER_AFTER); + } + + if (has_color) { + png_set_rgb_to_gray(png_ptr, 1, 0.2989, 0.587); + } + channels = 2; + } + break; + case IMAGE_READ_MODE_RGB: + if (color_type != PNG_COLOR_TYPE_RGB) { + if (is_palette) { + png_set_palette_to_rgb(png_ptr); + has_alpha = true; + } else if (!has_color) { + png_set_gray_to_rgb(png_ptr); + } + + if (has_alpha) { + png_set_strip_alpha(png_ptr); + } + channels = 3; + } + break; + case IMAGE_READ_MODE_RGB_ALPHA: + if (color_type != PNG_COLOR_TYPE_RGB_ALPHA) { + if (is_palette) { + png_set_palette_to_rgb(png_ptr); + has_alpha = true; + } else if (!has_color) { + png_set_gray_to_rgb(png_ptr); + } + + if (!has_alpha) { + png_set_add_alpha(png_ptr, (1 << bit_depth) - 1, PNG_FILLER_AFTER); + } + channels = 4; + } + break; + default: + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + TORCH_CHECK(false, "The provided mode is not supported for PNG files"); + } + + png_read_update_info(png_ptr, info_ptr); + } + + auto tensor = + torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8); + auto ptr = tensor.accessor().data(); + auto bytes = png_get_rowbytes(png_ptr, info_ptr); + for (png_uint_32 i = 0; i < height; ++i) { + png_read_row(png_ptr, ptr, nullptr); + ptr += bytes; + } + png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); + return tensor.permute({2, 0, 1}); +} +#endif + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/decode_png.h b/torchvision/csrc/io/image/cpu/decode_png.h new file mode 100644 index 00000000000..471bf77d935 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/decode_png.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include "../image_read_mode.h" + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor decode_png( + const torch::Tensor& data, + ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp new file mode 100644 index 00000000000..c84ad37005d --- /dev/null +++ b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp @@ -0,0 +1,109 @@ +#include "encode_jpeg.h" + +#include "common_jpeg.h" + +namespace vision { +namespace image { + +#if !JPEG_FOUND + +torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { + TORCH_CHECK( + false, "encode_jpeg: torchvision not compiled with libjpeg support"); +} + +#else + +using namespace detail; + +torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { + // Define compression structures and error handling + struct jpeg_compress_struct cinfo; + struct torch_jpeg_error_mgr jerr; + + // Define buffer to write JPEG information to and its size + unsigned long jpegSize = 0; + uint8_t* jpegBuf = NULL; + + cinfo.err = jpeg_std_error(&jerr.pub); + jerr.pub.error_exit = torch_jpeg_error_exit; + + /* Establish the setjmp return context for my_error_exit to use. */ + if (setjmp(jerr.setjmp_buffer)) { + /* If we get here, the JPEG code has signaled an error. + * We need to clean up the JPEG object and the buffer. + */ + jpeg_destroy_compress(&cinfo); + if (jpegBuf != NULL) { + free(jpegBuf); + } + + TORCH_CHECK(false, (const char*)jerr.jpegLastErrorMsg); + } + + // Check that the input tensor is on CPU + TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + + // Check that the input tensor is 3-dimensional + TORCH_CHECK(data.dim() == 3, "Input data should be a 3-dimensional tensor"); + + // Get image info + int channels = data.size(0); + int height = data.size(1); + int width = data.size(2); + auto input = data.permute({1, 2, 0}).contiguous(); + + TORCH_CHECK( + channels == 1 || channels == 3, + "The number of channels should be 1 or 3, got: ", + channels); + + // Initialize JPEG structure + jpeg_create_compress(&cinfo); + + // Set output image information + cinfo.image_width = width; + cinfo.image_height = height; + cinfo.input_components = channels; + cinfo.in_color_space = channels == 1 ? JCS_GRAYSCALE : JCS_RGB; + + jpeg_set_defaults(&cinfo); + jpeg_set_quality(&cinfo, quality, TRUE); + + // Save JPEG output to a buffer + jpeg_mem_dest(&cinfo, &jpegBuf, &jpegSize); + + // Start JPEG compression + jpeg_start_compress(&cinfo, TRUE); + + auto stride = width * channels; + auto ptr = input.data_ptr(); + + // Encode JPEG file + while (cinfo.next_scanline < cinfo.image_height) { + jpeg_write_scanlines(&cinfo, &ptr, 1); + ptr += stride; + } + + jpeg_finish_compress(&cinfo); + jpeg_destroy_compress(&cinfo); + + torch::TensorOptions options = torch::TensorOptions{torch::kU8}; + auto outTensor = torch::empty({(long)jpegSize}, options); + + // Copy memory from jpeg buffer, since torch cannot get ownership of it via + // `from_blob` + auto outPtr = outTensor.data_ptr(); + std::memcpy(outPtr, jpegBuf, sizeof(uint8_t) * outTensor.numel()); + + free(jpegBuf); + + return outTensor; +} +#endif + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.h b/torchvision/csrc/io/image/cpu/encode_jpeg.h new file mode 100644 index 00000000000..25084e154d6 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/encode_jpeg.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor encode_jpeg( + const torch::Tensor& data, + int64_t quality); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp new file mode 100644 index 00000000000..d28bad95890 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/encode_png.cpp @@ -0,0 +1,179 @@ +#include "encode_jpeg.h" + +#include "common_png.h" + +namespace vision { +namespace image { + +#if !PNG_FOUND + +torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { + TORCH_CHECK( + false, "encode_png: torchvision not compiled with libpng support"); +} + +#else + +namespace { + +struct torch_mem_encode { + char* buffer; + size_t size; +}; + +struct torch_png_error_mgr { + const char* pngLastErrorMsg; /* error messages */ + jmp_buf setjmp_buffer; /* for return to caller */ +}; + +using torch_png_error_mgr_ptr = torch_png_error_mgr*; + +void torch_png_error(png_structp png_ptr, png_const_charp error_msg) { + /* png_ptr->err really points to a torch_png_error_mgr struct, so coerce + * pointer */ + auto error_ptr = (torch_png_error_mgr_ptr)png_get_error_ptr(png_ptr); + /* Replace the error message on the error structure */ + error_ptr->pngLastErrorMsg = error_msg; + /* Return control to the setjmp point */ + longjmp(error_ptr->setjmp_buffer, 1); +} + +void torch_png_write_data( + png_structp png_ptr, + png_bytep data, + png_size_t length) { + struct torch_mem_encode* p = + (struct torch_mem_encode*)png_get_io_ptr(png_ptr); + size_t nsize = p->size + length; + + /* allocate or grow buffer */ + if (p->buffer) + p->buffer = (char*)realloc(p->buffer, nsize); + else + p->buffer = (char*)malloc(nsize); + + if (!p->buffer) + png_error(png_ptr, "Write Error"); + + /* copy new bytes to end of buffer */ + memcpy(p->buffer + p->size, data, length); + p->size += length; +} + +} // namespace + +torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { + // Define compression structures and error handling + png_structp png_write; + png_infop info_ptr; + struct torch_png_error_mgr err_ptr; + + // Define output buffer + struct torch_mem_encode buf_info; + buf_info.buffer = NULL; + buf_info.size = 0; + + /* Establish the setjmp return context for my_error_exit to use. */ + if (setjmp(err_ptr.setjmp_buffer)) { + /* If we get here, the PNG code has signaled an error. + * We need to clean up the PNG object and the buffer. + */ + if (info_ptr != NULL) { + png_destroy_info_struct(png_write, &info_ptr); + } + + if (png_write != NULL) { + png_destroy_write_struct(&png_write, NULL); + } + + if (buf_info.buffer != NULL) { + free(buf_info.buffer); + } + + TORCH_CHECK(false, err_ptr.pngLastErrorMsg); + } + + // Check that the compression level is between 0 and 9 + TORCH_CHECK( + compression_level >= 0 && compression_level <= 9, + "Compression level should be between 0 and 9"); + + // Check that the input tensor is on CPU + TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + + // Check that the input tensor is 3-dimensional + TORCH_CHECK(data.dim() == 3, "Input data should be a 3-dimensional tensor"); + + // Get image info + int channels = data.size(0); + int height = data.size(1); + int width = data.size(2); + auto input = data.permute({1, 2, 0}).contiguous(); + + TORCH_CHECK( + channels == 1 || channels == 3, + "The number of channels should be 1 or 3, got: ", + channels); + + // Initialize PNG structures + png_write = png_create_write_struct( + PNG_LIBPNG_VER_STRING, &err_ptr, torch_png_error, NULL); + + info_ptr = png_create_info_struct(png_write); + + // Define custom buffer output + png_set_write_fn(png_write, &buf_info, torch_png_write_data, NULL); + + // Set output image information + auto color_type = channels == 1 ? PNG_COLOR_TYPE_GRAY : PNG_COLOR_TYPE_RGB; + png_set_IHDR( + png_write, + info_ptr, + width, + height, + 8, + color_type, + PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_DEFAULT, + PNG_FILTER_TYPE_DEFAULT); + + // Set image compression level + png_set_compression_level(png_write, compression_level); + + // Write file header + png_write_info(png_write, info_ptr); + + auto stride = width * channels; + auto ptr = input.data_ptr(); + + // Encode PNG file + for (int y = 0; y < height; ++y) { + png_write_row(png_write, ptr); + ptr += stride; + } + + // Write EOF + png_write_end(png_write, info_ptr); + + // Destroy structures + png_destroy_write_struct(&png_write, &info_ptr); + + torch::TensorOptions options = torch::TensorOptions{torch::kU8}; + auto outTensor = torch::empty({(long)buf_info.size}, options); + + // Copy memory from png buffer, since torch cannot get ownership of it via + // `from_blob` + auto outPtr = outTensor.data_ptr(); + std::memcpy(outPtr, buf_info.buffer, sizeof(uint8_t) * outTensor.numel()); + free(buf_info.buffer); + + return outTensor; +} + +#endif + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/encode_png.h b/torchvision/csrc/io/image/cpu/encode_png.h new file mode 100644 index 00000000000..86a67c8706e --- /dev/null +++ b/torchvision/csrc/io/image/cpu/encode_png.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor encode_png( + const torch::Tensor& data, + int64_t compression_level); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/read_write_file.cpp b/torchvision/csrc/io/image/cpu/read_write_file.cpp new file mode 100644 index 00000000000..a0bb7df72d5 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/read_write_file.cpp @@ -0,0 +1,104 @@ +#include "read_write_file.h" + +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + +namespace vision { +namespace image { + +#ifdef _WIN32 +namespace { +std::wstring utf8_decode(const std::string& str) { + if (str.empty()) { + return std::wstring(); + } + int size_needed = MultiByteToWideChar( + CP_UTF8, 0, str.c_str(), static_cast(str.size()), NULL, 0); + TORCH_CHECK(size_needed > 0, "Error converting the content to Unicode"); + std::wstring wstrTo(size_needed, 0); + MultiByteToWideChar( + CP_UTF8, + 0, + str.c_str(), + static_cast(str.size()), + &wstrTo[0], + size_needed); + return wstrTo; +} +} // namespace +#endif + +torch::Tensor read_file(const std::string& filename) { +#ifdef _WIN32 + // According to + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/stat-functions?view=vs-2019, + // we should use struct __stat64 and _wstat64 for 64-bit file size on Windows. + struct __stat64 stat_buf; + auto fileW = utf8_decode(filename); + int rc = _wstat64(fileW.c_str(), &stat_buf); +#else + struct stat stat_buf; + int rc = stat(filename.c_str(), &stat_buf); +#endif + // errno is a variable defined in errno.h + TORCH_CHECK( + rc == 0, "[Errno ", errno, "] ", strerror(errno), ": '", filename, "'"); + + int64_t size = stat_buf.st_size; + + TORCH_CHECK(size > 0, "Expected a non empty file"); + +#ifdef _WIN32 + // TODO: Once torch::from_file handles UTF-8 paths correctly, we should move + // back to use the following implementation since it uses file mapping. + // auto data = + // torch::from_file(filename, /*shared=*/false, /*size=*/size, + // torch::kU8).clone() + FILE* infile = _wfopen(fileW.c_str(), L"rb"); + + TORCH_CHECK(infile != nullptr, "Error opening input file"); + + auto data = torch::empty({size}, torch::kU8); + auto dataBytes = data.data_ptr(); + + fread(dataBytes, sizeof(uint8_t), size, infile); + fclose(infile); +#else + auto data = + torch::from_file(filename, /*shared=*/false, /*size=*/size, torch::kU8); +#endif + + return data; +} + +void write_file(const std::string& filename, torch::Tensor& data) { + // Check that the input tensor is on CPU + TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + + // Check that the input tensor dtype is uint8 + TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + + // Check that the input tensor is 3-dimensional + TORCH_CHECK(data.dim() == 1, "Input data should be a 1-dimensional tensor"); + + auto fileBytes = data.data_ptr(); + auto fileCStr = filename.c_str(); +#ifdef _WIN32 + auto fileW = utf8_decode(filename); + FILE* outfile = _wfopen(fileW.c_str(), L"wb"); +#else + FILE* outfile = fopen(fileCStr, "wb"); +#endif + + TORCH_CHECK(outfile != nullptr, "Error opening output file"); + + fwrite(fileBytes, sizeof(uint8_t), data.numel(), outfile); + fclose(outfile); +} + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/cpu/read_write_file.h b/torchvision/csrc/io/image/cpu/read_write_file.h new file mode 100644 index 00000000000..a5a712dd8e2 --- /dev/null +++ b/torchvision/csrc/io/image/cpu/read_write_file.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace vision { +namespace image { + +C10_EXPORT torch::Tensor read_file(const std::string& filename); + +C10_EXPORT void write_file(const std::string& filename, torch::Tensor& data); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/image.cpp b/torchvision/csrc/io/image/image.cpp new file mode 100644 index 00000000000..51cf9c7ce3e --- /dev/null +++ b/torchvision/csrc/io/image/image.cpp @@ -0,0 +1,27 @@ +#include "image.h" + +#include + +// If we are in a Windows environment, we need to define +// initialization functions for the _custom_ops extension +#ifdef _WIN32 +PyMODINIT_FUNC PyInit_image(void) { + // No need to do anything. + return NULL; +} +#endif + +namespace vision { +namespace image { + +static auto registry = torch::RegisterOperators() + .op("image::decode_png", &decode_png) + .op("image::encode_png", &encode_png) + .op("image::decode_jpeg", &decode_jpeg) + .op("image::encode_jpeg", &encode_jpeg) + .op("image::read_file", &read_file) + .op("image::write_file", &write_file) + .op("image::decode_image", &decode_image); + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/image/image.h b/torchvision/csrc/io/image/image.h new file mode 100644 index 00000000000..fb09d6d71b8 --- /dev/null +++ b/torchvision/csrc/io/image/image.h @@ -0,0 +1,8 @@ +#pragma once + +#include "cpu/decode_image.h" +#include "cpu/decode_jpeg.h" +#include "cpu/decode_png.h" +#include "cpu/encode_jpeg.h" +#include "cpu/encode_png.h" +#include "cpu/read_write_file.h" diff --git a/torchvision/csrc/io/image/image_read_mode.h b/torchvision/csrc/io/image/image_read_mode.h new file mode 100644 index 00000000000..84425265c34 --- /dev/null +++ b/torchvision/csrc/io/image/image_read_mode.h @@ -0,0 +1,17 @@ +#pragma once + +#include + +namespace vision { +namespace image { + +/* Should be kept in-sync with Python ImageReadMode enum */ +using ImageReadMode = int64_t; +const ImageReadMode IMAGE_READ_MODE_UNCHANGED = 0; +const ImageReadMode IMAGE_READ_MODE_GRAY = 1; +const ImageReadMode IMAGE_READ_MODE_GRAY_ALPHA = 2; +const ImageReadMode IMAGE_READ_MODE_RGB = 3; +const ImageReadMode IMAGE_READ_MODE_RGB_ALPHA = 4; + +} // namespace image +} // namespace vision diff --git a/torchvision/csrc/io/video/video.cpp b/torchvision/csrc/io/video/video.cpp new file mode 100644 index 00000000000..d7d28a51770 --- /dev/null +++ b/torchvision/csrc/io/video/video.cpp @@ -0,0 +1,342 @@ +#include "video.h" + +#include + +namespace vision { +namespace video { + +namespace { + +const size_t decoderTimeoutMs = 600000; +const AVPixelFormat defaultVideoPixelFormat = AV_PIX_FMT_RGB24; + +// returns number of written bytes +template +size_t fillTensorList(DecoderOutputMessage& msgs, torch::Tensor& frame) { + const auto& msg = msgs; + T* frameData = frame.numel() > 0 ? frame.data_ptr() : nullptr; + if (frameData) { + auto sizeInBytes = msg.payload->length(); + memcpy(frameData, msg.payload->data(), sizeInBytes); + } + return sizeof(T); +} + +size_t fillVideoTensor(DecoderOutputMessage& msgs, torch::Tensor& videoFrame) { + return fillTensorList(msgs, videoFrame); +} + +size_t fillAudioTensor(DecoderOutputMessage& msgs, torch::Tensor& audioFrame) { + return fillTensorList(msgs, audioFrame); +} + +std::array, 4>::const_iterator +_parse_type(const std::string& stream_string) { + static const std::array, 4> types = {{ + {"video", TYPE_VIDEO}, + {"audio", TYPE_AUDIO}, + {"subtitle", TYPE_SUBTITLE}, + {"cc", TYPE_CC}, + }}; + auto device = std::find_if( + types.begin(), + types.end(), + [stream_string](const std::pair& p) { + return p.first == stream_string; + }); + if (device != types.end()) { + return device; + } + TORCH_CHECK( + false, "Expected one of [audio, video, subtitle, cc] ", stream_string); +} + +std::string parse_type_to_string(const std::string& stream_string) { + auto device = _parse_type(stream_string); + return device->first; +} + +MediaType parse_type_to_mt(const std::string& stream_string) { + auto device = _parse_type(stream_string); + return device->second; +} + +std::tuple _parseStream(const std::string& streamString) { + TORCH_CHECK(!streamString.empty(), "Stream string must not be empty"); + static const std::regex regex("([a-zA-Z_]+)(?::([1-9]\\d*|0))?"); + std::smatch match; + + TORCH_CHECK( + std::regex_match(streamString, match, regex), + "Invalid stream string: '", + streamString, + "'"); + + std::string type_ = "video"; + type_ = parse_type_to_string(match[1].str()); + long index_ = -1; + if (match[2].matched) { + try { + index_ = c10::stoi(match[2].str()); + } catch (const std::exception&) { + TORCH_CHECK( + false, + "Could not parse device index '", + match[2].str(), + "' in device string '", + streamString, + "'"); + } + } + return std::make_tuple(type_, index_); +} + +} // namespace + +void Video::_getDecoderParams( + double videoStartS, + int64_t getPtsOnly, + std::string stream, + long stream_id = -1, + bool all_streams = false, + double seekFrameMarginUs = 10) { + int64_t videoStartUs = int64_t(videoStartS * 1e6); + + params.timeoutMs = decoderTimeoutMs; + params.startOffset = videoStartUs; + params.seekAccuracy = seekFrameMarginUs; + params.headerOnly = false; + + params.preventStaleness = false; // not sure what this is about + + if (all_streams == true) { + MediaFormat format; + format.stream = -2; + format.type = TYPE_AUDIO; + params.formats.insert(format); + + format.type = TYPE_VIDEO; + format.stream = -2; + format.format.video.width = 0; + format.format.video.height = 0; + format.format.video.cropImage = 0; + format.format.video.format = defaultVideoPixelFormat; + params.formats.insert(format); + + format.type = TYPE_SUBTITLE; + format.stream = -2; + params.formats.insert(format); + + format.type = TYPE_CC; + format.stream = -2; + params.formats.insert(format); + } else { + // parse stream type + MediaType stream_type = parse_type_to_mt(stream); + + // TODO: reset params.formats + std::set formats; + params.formats = formats; + // Define new format + MediaFormat format; + format.type = stream_type; + format.stream = stream_id; + if (stream_type == TYPE_VIDEO) { + format.format.video.width = 0; + format.format.video.height = 0; + format.format.video.cropImage = 0; + format.format.video.format = defaultVideoPixelFormat; + } + params.formats.insert(format); + } + +} // _get decoder params + +Video::Video(std::string videoPath, std::string stream) { + // parse stream information + current_stream = _parseStream(stream); + // note that in the initial call we want to get all streams + Video::_getDecoderParams( + 0, // video start + 0, // headerOnly + std::get<0>(current_stream), // stream info - remove that + long(-1), // stream_id parsed from info above change to -2 + true // read all streams + ); + + std::string logMessage, logType; + + // TODO: add read from memory option + params.uri = videoPath; + logType = "file"; + logMessage = videoPath; + + // locals + std::vector audioFPS, videoFPS; + std::vector audioDuration, videoDuration, ccDuration, subsDuration; + std::vector audioTB, videoTB, ccTB, subsTB; + c10::Dict> audioMetadata; + c10::Dict> videoMetadata; + c10::Dict> ccMetadata; + c10::Dict> subsMetadata; + + // calback and metadata defined in struct + succeeded = decoder.init(params, std::move(callback), &metadata); + if (succeeded) { + for (const auto& header : metadata) { + double fps = double(header.fps); + double duration = double(header.duration) * 1e-6; // * timeBase; + + if (header.format.type == TYPE_VIDEO) { + videoFPS.push_back(fps); + videoDuration.push_back(duration); + } else if (header.format.type == TYPE_AUDIO) { + audioFPS.push_back(fps); + audioDuration.push_back(duration); + } else if (header.format.type == TYPE_CC) { + ccDuration.push_back(duration); + } else if (header.format.type == TYPE_SUBTITLE) { + subsDuration.push_back(duration); + }; + } + } + // audio + audioMetadata.insert("duration", audioDuration); + audioMetadata.insert("framerate", audioFPS); + // video + videoMetadata.insert("duration", videoDuration); + videoMetadata.insert("fps", videoFPS); + // subs + subsMetadata.insert("duration", subsDuration); + // cc + ccMetadata.insert("duration", ccDuration); + // put all to a data + streamsMetadata.insert("video", videoMetadata); + streamsMetadata.insert("audio", audioMetadata); + streamsMetadata.insert("subtitles", subsMetadata); + streamsMetadata.insert("cc", ccMetadata); + + succeeded = Video::setCurrentStream(stream); + LOG(INFO) << "\nDecoder inited with: " << succeeded << "\n"; + if (std::get<1>(current_stream) != -1) { + LOG(INFO) + << "Stream index set to " << std::get<1>(current_stream) + << ". If you encounter trouble, consider switching it to automatic stream discovery. \n"; + } +} // video + +bool Video::setCurrentStream(std::string stream = "video") { + if ((!stream.empty()) && (_parseStream(stream) != current_stream)) { + current_stream = _parseStream(stream); + } + + double ts = 0; + if (seekTS > 0) { + ts = seekTS; + } + + _getDecoderParams( + ts, // video start + 0, // headerOnly + std::get<0>(current_stream), // stream + long(std::get<1>( + current_stream)), // stream_id parsed from info above change to -2 + false // read all streams + ); + + // calback and metadata defined in Video.h + return (decoder.init(params, std::move(callback), &metadata)); +} + +std::tuple Video::getCurrentStream() const { + return current_stream; +} + +c10::Dict>> Video:: + getStreamMetadata() const { + return streamsMetadata; +} + +void Video::Seek(double ts) { + // initialize the class variables used for seeking and retrurn + _getDecoderParams( + ts, // video start + 0, // headerOnly + std::get<0>(current_stream), // stream + long(std::get<1>( + current_stream)), // stream_id parsed from info above change to -2 + false // read all streams + ); + + // calback and metadata defined in Video.h + succeeded = decoder.init(params, std::move(callback), &metadata); + LOG(INFO) << "Decoder init at seek " << succeeded << "\n"; +} + +std::tuple Video::Next() { + // if failing to decode simply return a null tensor (note, should we + // raise an exeption?) + double frame_pts_s; + torch::Tensor outFrame = torch::zeros({0}, torch::kByte); + + // decode single frame + DecoderOutputMessage out; + int64_t res = decoder.decode(&out, decoderTimeoutMs); + // if successfull + if (res == 0) { + frame_pts_s = double(double(out.header.pts) * 1e-6); + + auto header = out.header; + const auto& format = header.format; + + // initialize the output variables based on type + + if (format.type == TYPE_VIDEO) { + // note: this can potentially be optimized + // by having the global tensor that we fill at decode time + // (would avoid allocations) + int outHeight = format.format.video.height; + int outWidth = format.format.video.width; + int numChannels = 3; + outFrame = torch::zeros({outHeight, outWidth, numChannels}, torch::kByte); + fillVideoTensor(out, outFrame); + outFrame = outFrame.permute({2, 0, 1}); + + } else if (format.type == TYPE_AUDIO) { + int outAudioChannels = format.format.audio.channels; + int bytesPerSample = av_get_bytes_per_sample( + static_cast(format.format.audio.format)); + int frameSizeTotal = out.payload->length(); + + CHECK_EQ(frameSizeTotal % (outAudioChannels * bytesPerSample), 0); + int numAudioSamples = + frameSizeTotal / (outAudioChannels * bytesPerSample); + + outFrame = + torch::zeros({numAudioSamples, outAudioChannels}, torch::kFloat); + + fillAudioTensor(out, outFrame); + } + // currently not supporting other formats (will do soon) + + out.payload.reset(); + } else if (res == ENODATA) { + LOG(INFO) << "Decoder ran out of frames (ENODATA)\n"; + } else { + LOG(ERROR) << "Decoder failed with ERROR_CODE " << res; + } + + return std::make_tuple(outFrame, frame_pts_s); +} + +static auto registerVideo = + torch::class_