Update on "[quant][fix] Fix quant type classification for float_qpara…

…m qconfig" Summary: also renamed float_qparam_dynamic_qconfig to float_qparam_weight_only_qconfig It's not used in user code yet so we only need to update the tests. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: [D25010175](https://our.internmc.facebook.com/intern/diff/D25010175) [ghstack-poisoned]
pytorch · Nov 17, 2020 · 618459f · 618459f
2 parents 45bb4cb + 74da28b
commit 618459f
Show file tree

Hide file tree

Showing 18 changed files with 594 additions and 66 deletions.
diff --git a/.circleci/cimodel/data/binary_build_data.py b/.circleci/cimodel/data/binary_build_data.py
@@ -30,6 +30,9 @@ def get_processor_arch_name(gpu_version):
         "cu" + gpu_version.strip("cuda") if gpu_version.startswith("cuda") else gpu_version
     )
 
+PYTHON_NO_39 = [
+    v for v in dimensions.STANDARD_PYTHON_VERSIONS if v not in ['3.9']
+]
 
 LINUX_PACKAGE_VARIANTS = OrderedDict(
     manywheel=[
@@ -44,18 +47,11 @@ def get_processor_arch_name(gpu_version):
     ],
 )
 
-# TODO: There's an issue with current Python 3.9 builds that only occurs during
-#       windows builds, let's just not build 3.9 for windows and figure out how
-#       to resolve afterwards
-PYTHON_VERSIONS_NO_39 = [
-    v for v in dimensions.STANDARD_PYTHON_VERSIONS if v not in ['3.9']
-]
-
 CONFIG_TREE_DATA = OrderedDict(
     linux=(dimensions.GPU_VERSIONS, LINUX_PACKAGE_VARIANTS),
     macos=([None], OrderedDict(
-        wheel=PYTHON_VERSIONS_NO_39,
-        conda=PYTHON_VERSIONS_NO_39,
+        wheel=dimensions.STANDARD_PYTHON_VERSIONS,
+        conda=dimensions.STANDARD_PYTHON_VERSIONS,
         libtorch=[
             "3.7",
         ],
@@ -64,8 +60,8 @@ def get_processor_arch_name(gpu_version):
     windows=(
         [v for v in dimensions.GPU_VERSIONS if v not in ['cuda92'] + dimensions.ROCM_VERSION_LABELS],
         OrderedDict(
-            wheel=PYTHON_VERSIONS_NO_39,
-            conda=PYTHON_VERSIONS_NO_39,
+            wheel=PYTHON_NO_39,
+            conda=PYTHON_NO_39,
             libtorch=[
                 "3.7",
             ],

diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -2990,6 +2990,16 @@ workflows:
             tags:
               only:
                 - /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
+      - binary_mac_build:
+          name: binary_macos_wheel_3_9_cpu_nightly_build
+          build_environment: "wheel 3.9 cpu"
+          filters:
+            branches:
+              only:
+                - /.*/
+            tags:
+              only:
+                - /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
       - binary_mac_build:
           name: binary_macos_conda_3_6_cpu_nightly_build
           build_environment: "conda 3.6 cpu"
@@ -3020,6 +3030,16 @@ workflows:
             tags:
               only:
                 - /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
+      - binary_mac_build:
+          name: binary_macos_conda_3_9_cpu_nightly_build
+          build_environment: "conda 3.9 cpu"
+          filters:
+            branches:
+              only:
+                - /.*/
+            tags:
+              only:
+                - /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
       - binary_mac_build:
           name: binary_macos_libtorch_3_7_cpu_nightly_build
           build_environment: "libtorch 3.7 cpu"
@@ -6360,6 +6380,20 @@ workflows:
                 - /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           package_type: wheel
           upload_subfolder: cpu
+      - binary_upload:
+          name: binary_macos_wheel_3_9_cpu_nightly_upload
+          context: org-member
+          requires:
+            - binary_macos_wheel_3_9_cpu_nightly_build
+          filters:
+            branches:
+              only:
+                - nightly
+            tags:
+              only:
+                - /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
+          package_type: wheel
+          upload_subfolder: cpu
       - binary_upload:
           name: binary_macos_conda_3_6_cpu_nightly_upload
           context: org-member
@@ -6402,6 +6436,20 @@ workflows:
                 - /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           package_type: conda
           upload_subfolder: cpu
+      - binary_upload:
+          name: binary_macos_conda_3_9_cpu_nightly_upload
+          context: org-member
+          requires:
+            - binary_macos_conda_3_9_cpu_nightly_build
+          filters:
+            branches:
+              only:
+                - nightly
+            tags:
+              only:
+                - /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
+          package_type: conda
+          upload_subfolder: cpu
       - binary_upload:
           name: binary_macos_libtorch_3_7_cpu_nightly_upload
           context: org-member
@@ -8990,6 +9038,15 @@ workflows:
             branches:
               only:
                 - postnightly
+      - smoke_mac_test:
+          name: smoke_macos_wheel_3_9_cpu_nightly
+          build_environment: "wheel 3.9 cpu"
+          requires:
+            - update_s3_htmls
+          filters:
+            branches:
+              only:
+                - postnightly
       - smoke_mac_test:
           name: smoke_macos_conda_3_6_cpu_nightly
           build_environment: "conda 3.6 cpu"
@@ -9017,6 +9074,15 @@ workflows:
             branches:
               only:
                 - postnightly
+      - smoke_mac_test:
+          name: smoke_macos_conda_3_9_cpu_nightly
+          build_environment: "conda 3.9 cpu"
+          requires:
+            - update_s3_htmls
+          filters:
+            branches:
+              only:
+                - postnightly
       - smoke_mac_test:
           name: smoke_macos_libtorch_3_7_cpu_nightly
           build_environment: "libtorch 3.7 cpu"

diff --git a/.circleci/scripts/binary_macos_test.sh b/.circleci/scripts/binary_macos_test.sh
@@ -20,9 +20,9 @@ if [[ "$PACKAGE_TYPE" == libtorch ]]; then
   unzip "$pkg" -d /tmp
   cd /tmp/libtorch
 elif [[ "$PACKAGE_TYPE" == conda ]]; then
-  conda install -y "$pkg" --offline
+  conda install -y "$pkg"
 else
-  pip install "$pkg" --no-index --no-dependencies -v
+  pip install "$pkg" -v
 fi
 
 # Test

diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
@@ -135,7 +135,7 @@ endif()
 
 # Metal
 if(USE_PYTORCH_METAL)
-  if(IOS)
+  if(APPLE)
     set(all_cpu_cpp ${all_cpu_cpp} ${metal_cpp} ${native_metal_srcs})
   else()
     # Add files needed from optimized_for_mobile

diff --git a/aten/src/ATen/native/ReflectionPad.cpp b/aten/src/ATen/native/ReflectionPad.cpp
@@ -346,23 +346,43 @@ void reflection_pad2d_out_template(
   if (input.ndimension() == 3) {
     /* resize output */
     output.resize_({nplane, output_h, output_w});
-    AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "reflection_pad2d", [&] {
-      reflection_pad2d_out_frame(
-        input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
-        nplane,
-        input_w, input_h, output_w, output_h,
-        pad_l, pad_t);
-    });
+    if (input.is_quantized()) {
+      AT_DISPATCH_QINT_TYPES(input.scalar_type(), "qreflection_pad2d", [&] {
+        reflection_pad2d_out_frame(
+          input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
+          nplane,
+          input_w, input_h, output_w, output_h,
+          pad_l, pad_t);
+      });
+    } else {
+      AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "reflection_pad2d", [&] {
+        reflection_pad2d_out_frame(
+          input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
+          nplane,
+          input_w, input_h, output_w, output_h,
+          pad_l, pad_t);
+      });
+    }
   } else {
     /* resize output */
     output.resize_({nbatch, nplane, output_h, output_w});
-    AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "reflection_pad2d", [&] {
-      reflection_pad2d_out_loop(
-        input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
-        nbatch, nplane,
-        input_w, input_h, output_w, output_h,
-        pad_l, pad_t);
-    });
+    if (input.is_quantized()) {
+      AT_DISPATCH_QINT_TYPES(input.scalar_type(), "qreflection_pad2d", [&] {
+        reflection_pad2d_out_loop(
+          input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
+          nbatch, nplane,
+          input_w, input_h, output_w, output_h,
+          pad_l, pad_t);
+      });
+    } else {
+      AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "reflection_pad2d", [&] {
+        reflection_pad2d_out_loop(
+          input.data_ptr<scalar_t>(), output.data_ptr<scalar_t>(),
+          nbatch, nplane,
+          input_w, input_h, output_w, output_h,
+          pad_l, pad_t);
+      });
+    }
   }
 }
 
@@ -547,7 +567,18 @@ Tensor& reflection_pad2d_out_cpu(
 }
 
 Tensor reflection_pad2d_cpu(const Tensor& input, IntArrayRef padding) {
-  auto output = at::empty({0}, input.options());
+  Tensor output;
+  if (input.is_quantized()) {
+    if (input.qscheme() == kPerTensorAffine) {
+      output = at::_empty_affine_quantized({0}, input.options(),
+                                           input.q_scale(),
+                                           input.q_zero_point());
+    } else {
+      TORCH_CHECK(false, "Only per tensor quantization is supported");
+    }
+  } else {
+    output = at::empty({0}, input.options());
+  }
   reflection_pad2d_out_template(output, input, padding);
   return output;
 }

diff --git a/aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp b/aten/src/ATen/native/metal/MetalPrepackOpRegister.cpp
@@ -3,7 +3,7 @@
 #include <ATen/ATen.h>
 
 
-#if defined(C10_IOS)
+#if (C10_IOS || TARGET_OS_MAC)
 #import <ATen/native/metal/mpscnn/MPSCNNOps.h>
 #endif
 
@@ -94,19 +94,19 @@ c10::intrusive_ptr<Conv2dOpContext> conv2d_prepack(
 Tensor conv2d_prepack_run(
     const Tensor& input,
     const c10::intrusive_ptr<Conv2dOpContext>& op_context) {
-#if defined(C10_IOS)
+#if (C10_IOS || TARGET_OS_MAC)
   return mpscnn::conv2d(input, *op_context);
 #else
-  TORCH_CHECK(false, "conv2d_prepack_run can only be invoked on iOS");
+  TORCH_CHECK(false, "conv2d_prepack_run can only be invoked on iOS and MacOS");
   return input;
 #endif
 }
 
 Tensor copy_to_host(const Tensor& input) {
-#if defined(C10_IOS)
+#if (C10_IOS || TARGET_OS_MAC)
   return mpscnn::copy_to_host(input);
 #else
-  TORCH_CHECK(false, "copy_to_host can only be invoked on iOS");
+  TORCH_CHECK(false, "copy_to_host can only be invoked on iOS and MacOS");
   return input;
 #endif
 }

diff --git a/aten/src/ATen/native/metal/mpscnn/MPSCNNContext.mm b/aten/src/ATen/native/metal/mpscnn/MPSCNNContext.mm
@@ -4,8 +4,10 @@
 #include <torch/script.h>
 #include <mutex>
 
-#if defined(C10_IOS)
+#if C10_IOS
 #import <UIKit/UIKit.h>
+#elif TARGET_OS_MAC
+#import <Foundation/NSProcessInfo.h>
 #endif
 
 @implementation MPSCNNContext {
@@ -31,10 +33,11 @@ + (instancetype)sharedInstance {
 }
 
 - (BOOL)available {
-#if defined(C10_IOS)
-#if TARGET_IPHONE_SIMULATOR
+#if !defined(__APPLE__)
   return false;
-#else
+#elif TARGET_IPHONE_SIMULATOR
+  return false;
+#elif TARGET_OS_IPHONE
   if (!MPSSupportsMTLDevice(_device)) {
     return false;
   }
@@ -45,8 +48,23 @@ - (BOOL)available {
           supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily3_v2]) {
     return false;
   }
+#elif TARGET_OS_MAC
+  if (!MPSSupportsMTLDevice(_device)) {
+    return false;
+  }
+  NSOperatingSystemVersion supportedVer = {10, 13, 0};
+  if (![[NSProcessInfo processInfo]
+          isOperatingSystemAtLeastVersion:supportedVer]) {
+    return false;
+  }
+  if (![MTLCreateSystemDefaultDevice()
+          supportsFeatureSet:MTLFeatureSet_macOS_GPUFamily1_v3]) {
+    return false;
+  }
+#else
+  return false;
 #endif
-#endif
+
   return _device && _library && _commandQueue;
 }
 

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -8065,16 +8065,15 @@
 - func: reflection_pad1d.out(Tensor self, int[2] padding, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
   dispatch:
-    CPU: reflection_pad1d_out_cpu
+    CPU, QuantizedCPU: reflection_pad1d_out_cpu
     CUDA: reflection_pad1d_out_cuda
 
 - func: reflection_pad1d(Tensor self, int[2] padding) -> Tensor
   use_c10_dispatcher: full
   python_module: nn
   dispatch:
-    CPU: reflection_pad1d_cpu
+    CPU, QuantizedCPU: reflection_pad1d_cpu
     CUDA: reflection_pad1d_cuda
-    QuantizedCPU: reflection_pad1d_cpu
 
 - func: reflection_pad1d_backward.grad_input(Tensor grad_output, Tensor self, int[2] padding, *, Tensor(a!) grad_input) -> Tensor(a!)
   python_module: nn
@@ -8092,14 +8091,14 @@
 - func: reflection_pad2d.out(Tensor self, int[4] padding, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
   dispatch:
-    CPU: reflection_pad2d_out_cpu
+    CPU, QuantizedCPU: reflection_pad2d_out_cpu
     CUDA: reflection_pad2d_out_cuda
 
 - func: reflection_pad2d(Tensor self, int[4] padding) -> Tensor
   use_c10_dispatcher: full
   python_module: nn
   dispatch:
-    CPU: reflection_pad2d_cpu
+    CPU, QuantizedCPU: reflection_pad2d_cpu
     CUDA: reflection_pad2d_cuda
 
 - func: reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, int[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)

diff --git a/benchmarks/static_runtime/deep_wide_pt.cc b/benchmarks/static_runtime/deep_wide_pt.cc
@@ -112,3 +112,19 @@ torch::jit::Module getLeakyReLUConstScriptModel() {
   module.define(leaky_relu_model_const);
   return module;
 }
+
+const std::string long_model = R"JIT(
+  def forward(self, a, b, c):
+      d = torch.relu(a * b)
+      e = torch.relu(a * c)
+      f = torch.relu(e * d)
+      g = torch.relu(f * f)
+      h = torch.relu(g * c)
+      return h
+)JIT";
+
+torch::jit::Module getLongScriptModel() {
+  torch::jit::Module module("m");
+  module.define(long_model);
+  return module;
+}
diff --git a/benchmarks/static_runtime/deep_wide_pt.h b/benchmarks/static_runtime/deep_wide_pt.h
@@ -134,3 +134,5 @@ torch::jit::Module getTrivialScriptModel();
 torch::jit::Module getLeakyReLUScriptModel();
 
 torch::jit::Module getLeakyReLUConstScriptModel();
+
+torch::jit::Module getLongScriptModel();