pytorch · bertmaher · Sep 3, 2020 · Sep 3, 2020 · Sep 4, 2020 · Sep 5, 2020
diff --git a/test/test_jit_fuser_te.py b/test/test_jit_fuser_te.py
@@ -87,7 +87,7 @@ def get_nodes_and_parents_recursively(block, kind, acc):
                     acc[block].append(node)
                 elif node.kind() == 'prim::DifferentiableGraph':
                     get_nodes_and_parents_recursively(node.g('Subgraph'), kind, acc)
-                elif node.kind() == 'prim::If' and (node.inputs().__next__().node().kind() == 'aten::all' or 
+                elif node.kind() == 'prim::If' and (node.inputs().__next__().node().kind() == 'aten::all' or
                                                     node.inputs().__next__().node().kind() == 'prim::TypeCheck'):
                     get_nodes_and_parents_recursively(node.blocks().__next__(), kind, acc)
                 else:
@@ -1151,7 +1151,7 @@ def rand(dtype, device="cuda"):
                 return torch.rand(shape, dtype=dtype, device=device)
             else:
                 # dtype is an integer.
-                return torch.randint(0, 100, shape, dtype=dtype, device=device)
+                return torch.randint(1, 4, shape, dtype=dtype, device=device)
             raise RuntimeError("Unhandled dtype")
 
         dtypes = [
@@ -1160,12 +1160,12 @@ def rand(dtype, device="cuda"):
             torch.int16,
             torch.int32,
             torch.int64,
-            torch.float16,
+            # torch.float16,
+            # torch.bfloat16,
             torch.float32,
             torch.float64,
-            torch.bfloat16,
-            torch.bool,
-            torch.complex32,
+            # torch.bool,
+            # torch.complex32,
             # torch.complex64,
             # torch.complex128,
             # torch.qint8,
@@ -1175,6 +1175,32 @@ def rand(dtype, device="cuda"):
         unary_ops = [
             torch.sigmoid,
             torch.reciprocal,
+            torch.neg,
+            torch.relu,
+            torch.log,
+            torch.log10,
+            torch.log2,
+            torch.exp,
+            torch.expm1,
+            torch.erf,
+            torch.erfc,
+            torch.cos,
+            torch.sin,
+            torch.tan,
+            torch.acos,
+            torch.asin,
+            torch.cosh,
+            torch.sinh,
+            torch.atan,
+            torch.tanh,
+            torch.sqrt,
+            torch.rsqrt,
+            torch.abs,
+            torch.ceil,
+            torch.floor,
+            torch.round,
+            torch.trunc,
+            torch.frac,
         ]
         devices = [
             "cuda",
@@ -1189,9 +1215,14 @@ def rand(dtype, device="cuda"):
                 # neither does the fuser.  Catch everything to avoid needing to
                 # guess what errors might be thrown by eager.
                 continue
-            t = torch.jit.trace(fn, (x,))
-            self.assertEqual(ref, t(x))
-            self.assertAllFused(t.graph_for(x))
+            try:
+                t = torch.jit.trace(fn, (x,))
+                torch.testing.assert_allclose(ref, t(x))
+                self.assertAllFused(t.graph_for(x))
+            except Exception as e:
+                raise RuntimeError(" ".join([
+                    "Failed:", str(dtype), op.__name__, device
+                ]))
 
 if __name__ == '__main__':
     run_tests()
diff --git a/torch/csrc/jit/tensorexpr/cuda_codegen.cpp b/torch/csrc/jit/tensorexpr/cuda_codegen.cpp
@@ -110,7 +110,7 @@ std::string cudaDtypeCppString(const Dtype& dtype) {
     case ScalarType::Short:
       return "short";
     case ScalarType::Long:
-      return "long";
+      return "long long";
     default:
       return dtype.ToCppString();
   }
@@ -198,7 +198,7 @@ void CudaPrinter::visit(const Cast* v) {
     return;
   }
 
-  os() << cudaDtypeCppString(v->dtype());
+  os() << "(" << cudaDtypeCppString(v->dtype()) << ")";
   os() << "(";
   v->src_value()->accept(this);
   os() << ")";
@@ -221,6 +221,9 @@ void CudaPrinter::visit(const Intrinsics* v) {
   if (returnType == ScalarType::Half || returnType == ScalarType::Float) {
     func_name = func_name + "f";
   }
+  if (v->op_type() == IntrinsicsOp::kFabs && is_integral(returnType)) {
+    func_name = "abs";
+  }
 
   os() << func_name << "(";
   for (int i = 0; i < v->nparams(); i++) {