Skip to content

Commit

Permalink
rebase on "[TensorExpr] Fix lowering for aten::div."
Browse files Browse the repository at this point in the history
Differential Revision: [D25130750](https://our.internmc.facebook.com/intern/diff/D25130750)

[ghstack-poisoned]
  • Loading branch information
Mikhail Zolotukhin committed Nov 21, 2020
2 parents 946b4ab + 9554129 commit 63c23be
Show file tree
Hide file tree
Showing 34 changed files with 228 additions and 99 deletions.
8 changes: 4 additions & 4 deletions .circleci/cimodel/data/windows_build_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,10 @@ def TruePred(_):
WindowsJob(None, _VC2019, CudaVersion(10, 1)),
WindowsJob(1, _VC2019, CudaVersion(10, 1)),
WindowsJob(2, _VC2019, CudaVersion(10, 1)),
# VS2019 CUDA-11.1
WindowsJob(None, _VC2019, CudaVersion(11, 1)),
WindowsJob(1, _VC2019, CudaVersion(11, 1), master_only_pred=TruePred),
WindowsJob(2, _VC2019, CudaVersion(11, 1), master_only_pred=TruePred),
# VS2019 CUDA-11.0
WindowsJob(None, _VC2019, CudaVersion(11, 0)),
WindowsJob(1, _VC2019, CudaVersion(11, 0), master_only_pred=TruePred),
WindowsJob(2, _VC2019, CudaVersion(11, 0), master_only_pred=TruePred),
# VS2019 CPU-only
WindowsJob(None, _VC2019, None),
WindowsJob(1, _VC2019, None, master_only_pred=TruePred),
Expand Down
10 changes: 5 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7833,7 +7833,7 @@ workflows:
- pytorch_windows_build:
build_environment: pytorch-win-vs2019-cuda11-cudnn8-py3
cuda_version: "11"
name: pytorch_windows_vs2019_py36_cuda11.1_build
name: pytorch_windows_vs2019_py36_cuda11.0_build
python_version: "3.6"
use_cuda: "1"
vc_product: Community
Expand All @@ -7849,10 +7849,10 @@ workflows:
- master
- /ci-all\/.*/
- /release\/.*/
name: pytorch_windows_vs2019_py36_cuda11.1_test1
name: pytorch_windows_vs2019_py36_cuda11.0_test1
python_version: "3.6"
requires:
- pytorch_windows_vs2019_py36_cuda11.1_build
- pytorch_windows_vs2019_py36_cuda11.0_build
test_name: pytorch-windows-test1
use_cuda: "1"
vc_product: Community
Expand All @@ -7868,10 +7868,10 @@ workflows:
- master
- /ci-all\/.*/
- /release\/.*/
name: pytorch_windows_vs2019_py36_cuda11.1_test2
name: pytorch_windows_vs2019_py36_cuda11.0_test2
python_version: "3.6"
requires:
- pytorch_windows_vs2019_py36_cuda11.1_build
- pytorch_windows_vs2019_py36_cuda11.0_build
test_name: pytorch-windows-test2
use_cuda: "1"
vc_product: Community
Expand Down
6 changes: 3 additions & 3 deletions .circleci/scripts/windows_cuda_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ if [[ "$CUDA_VERSION" == "10" ]]; then
msbuild_project_dir="CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
cuda_install_packages="nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
elif [[ "$CUDA_VERSION" == "11" ]]; then
cuda_complete_version="11.1"
cuda_installer_name="cuda_11.1.0_456.43_win10"
cuda_complete_version="11.0"
cuda_installer_name="cuda_11.0.2_451.48_win10"
msbuild_project_dir="visual_studio_integration/CUDAVisualStudioIntegration/extras/visual_studio_integration/MSBuildExtensions"
cuda_install_packages="nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
cuda_install_packages="nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0"
else
echo "CUDA_VERSION $CUDA_VERSION is not supported yet"
exit 1
Expand Down
4 changes: 2 additions & 2 deletions .circleci/scripts/windows_cudnn_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ if [[ "$CUDA_VERSION" == "10" ]]; then
cuda_complete_version="10.1"
cudnn_installer_name="cudnn-10.1-windows10-x64-v7.6.4.38"
elif [[ "$CUDA_VERSION" == "11" ]]; then
cuda_complete_version="11.1"
cudnn_installer_name="cudnn-11.1-windows-x64-v8.0.5.39"
cuda_complete_version="11.0"
cudnn_installer_name="cudnn-11.0-windows-x64-v8.0.4.30"
else
echo "CUDNN for CUDA_VERSION $CUDA_VERSION is not supported yet"
exit 1
Expand Down
4 changes: 2 additions & 2 deletions .jenkins/pytorch/win-test-helpers/build_pytorch.bat
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ goto cuda_build_common

:cuda_build_11

set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.1
set CUDA_PATH_V11_1=%CUDA_PATH%
set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0
set CUDA_PATH_V11_0=%CUDA_PATH%

goto cuda_build_common

Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/DynamicLibrary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <dlfcn.h>
#include <libgen.h>
#else
#include <Windows.h>
#include <c10/util/win32-headers.h>
#endif

namespace at {
Expand Down
5 changes: 0 additions & 5 deletions aten/src/ATen/cuda/Exceptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,6 @@ const char *cusparseGetErrorString(cusparseStatus_t status);

#define AT_CUDA_CHECK(EXPR) C10_CUDA_CHECK(EXPR)

// This should be used directly after every kernel launch to ensure
// the launch happened correctly and provide an early, close-to-source
// diagnostic if it didn't.
#define TORCH_CUDA_KERNEL_LAUNCH_CHECK() AT_CUDA_CHECK(cudaGetLastError())

// For CUDA Driver API
//
// This is here instead of in c10 because NVRTC is loaded dynamically via a stub
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/native/cpu/ReduceOpsKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ static void norm_kernel_tensor_iterator_impl(
binary_kernel_reduce(
iter,
AbsMaxOps<scalar_t>(),
scalar_t(std::numeric_limits<scalar_t>::min())
scalar_t(0)
);
});
} else if (val == -INFINITY) {
Expand Down
4 changes: 3 additions & 1 deletion aten/src/ATen/native/metal/mpscnn/MPSCNNOps.mm
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,9 @@ Tensor max_pool2d(
strideInPixelsX:stride[0]
strideInPixelsY:stride[1]];
[pool setEdgeMode:MPSImageEdgeModeClamp];
[pool setOffset:{.x = kernel_size[0] / 2, .y = kernel_size[1] / 2, .z = 0}];
[pool setOffset:{.x = static_cast<NSInteger>(kernel_size[0] / 2),
.y = static_cast<NSInteger>(kernel_size[1] / 2),
.z = 0}];

int64_t oN = iN;
int64_t oC = iC;
Expand Down
4 changes: 2 additions & 2 deletions aten/src/TH/THAllocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

/* stuff for mapped files */
#ifdef _WIN32
#include <windows.h>
#include <c10/util/win32-headers.h>
#endif

#if defined(HAVE_MMAP)
Expand Down Expand Up @@ -333,7 +333,7 @@ typedef struct{
HANDLE handle;
HANDLE wait;
} ReleaseContext;
static VOID CALLBACK WaitForReleaseHandle(PVOID lpParam, BOOLEAN TimerOrWaitFired)
static void CALLBACK WaitForReleaseHandle(PVOID lpParam, BOOLEAN TimerOrWaitFired)
{
if (lpParam) {
ReleaseContext *ctx = (ReleaseContext *)lpParam;
Expand Down
3 changes: 3 additions & 0 deletions binaries/benchmark_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include <string>
#include <thread>
#ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <psapi.h>
#endif
Expand Down
5 changes: 5 additions & 0 deletions c10/cuda/CUDAException.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,8 @@
TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \
} \
} while (0)

// This should be used directly after every kernel launch to ensure
// the launch happened correctly and provide an early, close-to-source
// diagnostic if it didn't.
#define TORCH_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError())
6 changes: 1 addition & 5 deletions c10/util/Backtrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,8 @@
#include <vector>

#ifdef _MSC_VER
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <c10/util/win32-headers.h>
#include <iomanip>
#include <Windows.h>
#include <dbghelp.h>
#pragma comment(lib, "Dbghelp.lib")
#endif

Expand Down
4 changes: 4 additions & 0 deletions c10/util/C++17.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
#error You need C++14 to compile PyTorch
#endif

#if defined(_WIN32) && (defined(min) || defined(max))
# error Macro clash with min and max -- define NOMINMAX when compiling your program on Windows
#endif

/*
* This header adds some polyfills with C++17 functionality
*/
Expand Down
57 changes: 57 additions & 0 deletions c10/util/win32-headers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#pragma once

#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
#ifndef NOKERNEL
#define NOKERNEL
#endif
#ifndef NOUSER
#define NOUSER
#endif
#ifndef NOSERVICE
#define NOSERVICE
#endif
#ifndef NOSOUND
#define NOSOUND
#endif
#ifndef NOMCX
#define NOMCX
#endif
#ifndef NOGDI
#define NOGDI
#endif
#ifndef NOMSG
#define NOMSG
#endif
#ifndef NOMB
#define NOMB
#endif
#ifndef NOCLIPBOARD
#define NOCLIPBOARD
#endif

#include <windows.h>
#include <dbghelp.h>

#undef VOID
#undef DELETE
#undef IN
#undef THIS
#undef CONST
#undef NAN
#undef UNKNOWN
#undef NONE
#undef ANY
#undef IGNORE
#undef STRICT
#undef GetObject
#undef CreateSemaphore
#undef Yield
#undef RotateRight32
#undef RotateLeft32
#undef RotateRight64
#undef RotateLeft64
4 changes: 2 additions & 2 deletions caffe2/operators/gather_ranges_to_dense_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ class GatherRangesToDenseOp final : public Operator<Context> {
CAFFE_ENFORCE_EQ(
ranges.size(1),
lengths_.size(),
"Nummber of ranges should match number of lengths");
"Number of ranges should match number of lengths");
CAFFE_ENFORCE_EQ(
ranges.size(1),
OutputSize(),
"Nummber of ranges should match number of outputs");
"Number of ranges should match number of outputs");
CAFFE_ENFORCE_EQ(
ranges.size(2), 2, "Ranges last dimension should be of size 2");

Expand Down
3 changes: 3 additions & 0 deletions caffe2/python/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2343,6 +2343,9 @@ def make_builder(t):
)

def is_external_input(self, blob):
if self._recreate_lookup_tables:
self._RecreateLookupTables()

name = str(blob)
return name in self._external_input_map

Expand Down
19 changes: 19 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,25 @@ PyTorch documentation

PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.

Features described in this documentation are classified by release status:

*Stable:* These features will be maintained long-term and there should generally
be no major performance limitations or gaps in documentation.
We also expect to maintain backwards compatibility (although
breaking changes can happen and notice will be given one release ahead
of time).

*Beta:* Features are tagged as Beta because the API may change based on
user feedback, because the performance needs to improve, or because
coverage across operators is not yet complete. For Beta features, we are
committing to seeing the feature through to the Stable classification.
We are not, however, committing to backwards compatibility.

*Prototype:* These features are typically not available as part of
binary distributions like PyPI or Conda, except sometimes behind run-time
flags, and are at an early stage for feedback and testing.


.. toctree::
:glob:
:maxdepth: 1
Expand Down
3 changes: 3 additions & 0 deletions modules/observers/perf_observer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ defined(TARGET_IPHONE_SIMULATOR)
#endif

#ifdef _WIN32
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#endif

Expand Down
2 changes: 1 addition & 1 deletion test/jit/test_class_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def test_conditional_set_attr(self):
@torch.jit.script
class FooTest(object):
def __init__(self, x):
if True:
if 1 == 1:
self.attr = x

def test_class_type_as_param(self):
Expand Down
22 changes: 11 additions & 11 deletions test/jit/test_list_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,52 +46,52 @@ def str_in(x):
def test_list_literal(self):
def reassign():
x = [1]
if True:
if 1 == 1:
x = [2, 3]
return
self.checkScript(reassign, (), optimize=False)

def reassign_arity_change():
x = [1]
if True:
if 1 == 1:
x = [1, 2, 3]
return
self.checkScript(reassign_arity_change, (), optimize=False)

def reassign_from_empty_literal():
x = []
if True:
if 1 == 1:
x = [1, 2, 3]
return
with self.assertRaisesRegex(RuntimeError, r"previously has type List\[Tensor\]"):
self.checkScript(reassign_from_empty_literal, (), optimize=False)

def reassign_from_empty_builtin():
x = torch.jit.annotate(List[int], [])
if True:
if 1 == 1:
x = [1, 2, 3]
y = torch.jit.annotate(List[float], [])
if True:
if 1 == 1:
y = [1.0, 2.0, 3.0]
z = []
if True:
if 1 == 1:
z = [torch.randn([1])]
return
self.checkScript(reassign_from_empty_builtin, (), optimize=False)

def reassign_bad_type():
x = [1]
if True:
if 1 == 1:
x = [1.0]
return
with self.assertRaisesRegex(RuntimeError, "previously has type"):
self.checkScript(reassign_bad_type, (), optimize=False)

def reassign_nested():
x = torch.jit.annotate(List[int], [])
if True:
if 1 == 1:
x = [1, 2, 3]
if True:
if 1 == 1:
x = [1.0]
return
with self.assertRaisesRegex(RuntimeError, "previously has type"):
Expand Down Expand Up @@ -554,15 +554,15 @@ def test_append_2():
def test_mutable_list_append_if(self):
def test_append_if():
a = [1]
if True:
if 1 == 1:
a.append(4)
return a == [1, 4]
self.checkScript(test_append_if, ())

def test_mutable_list_append_if_else(self):
def test_append_if_else():
a = [1]
if False:
if 1 == 2:
a.append(4)
else:
a.append(10)
Expand Down

0 comments on commit 63c23be

Please sign in to comment.