Clip small scales to fp16 min

Summary: When the FC output min max range is very small, we want to enforce a cutoff on the scale parameter to better generalize for future values that could fall beyond the original range. Test Plan: More analysis about the output distributions can be found in N425166 An example workflow using fp16 min clipping is f240972205 Reviewed By: jspark1105 Differential Revision: D25681249 fbshipit-source-id: c4dfbd3ee823886afed06e6c2eccfc29d612f7e6
pytorch · Dec 24, 2020 · ec6de6a · ec6de6a
1 parent 89b4899
commit ec6de6a
Showing 1 changed file with 12 additions and 1 deletion.
diff --git a/caffe2/quantization/server/norm_minimization.cc b/caffe2/quantization/server/norm_minimization.cc
@@ -14,6 +14,10 @@ namespace dnnlowp {
 
 #undef NDEBUG
 
+// Use fp16_min as the small scale cutoff because we don't want to use scales in fp16 subnormal range.
+// This is to be consistent with Glow and FakeLowP implementation for NNPI.
+constexpr float SMALL_SCALE_THRESHOLD = 6.1e-5f;
+
 static float
 GetNorm(float begin, float end, float density, NormMinimization::Kind kind) {
   float norm = 0;
@@ -57,7 +61,8 @@ TensorQuantizationParams NormMinimization::NonlinearQuantizationParamsSearch(
   vector<float> bins_f(dnnlowp::adjust_hist_to_include_zero(hist, &min, &max));
   int nbins = bins_f.size();
   float bin_width = (max - min) / nbins;
-  if (bin_width == 0) {
+  float scale = (max - min) / float((1 << precision) - 1);
+  if (bin_width == 0 || scale < SMALL_SCALE_THRESHOLD) {
     QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
     return qfactory->ChooseQuantizationParams(
         min, max, precision, preserve_sparsity);
@@ -190,6 +195,12 @@ TensorQuantizationParams NormMinimization::ChooseQuantizationParams(
   int nbins = bins_f.size();
   float bin_width = (max - min) / nbins;
 
+  float scale = (max - min) / float((1 << precision) - 1);
+  if (bin_width == 0 || scale < SMALL_SCALE_THRESHOLD) {
+    QuantizationFactory* qfactory = QuantizationFactory::GetDefaultInstance();
+    return qfactory->ChooseQuantizationParams(
+        min, max, precision, preserve_sparsity);
+  }
   int dst_nbins = 1 << precision;
 
   int zero_bin = round(-min / bin_width);