From 50d1823867079d376e4d15cb462ebc2b24c4d569 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Sun, 5 Aug 2018 20:57:59 -0400 Subject: [PATCH 01/14] call inline pass in get_ir_of_code --- numba/ir_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/numba/ir_utils.py b/numba/ir_utils.py index 2e681c8c632..ea84f8c5878 100644 --- a/numba/ir_utils.py +++ b/numba/ir_utils.py @@ -11,7 +11,8 @@ import numba from numba.six import exec_ -from numba import ir, types, typing, config, analysis, utils, cgutils, rewrites +from numba import (ir, types, typing, config, analysis, utils, cgutils, + rewrites, postproc) from numba.typing.templates import signature, infer_global, AbstractTemplate from numba.targets.imputils import impl_ret_untracked from numba.analysis import (compute_live_map, compute_use_defs, @@ -1561,6 +1562,12 @@ def __init__(self, f_ir): self.calltypes = None rewrites.rewrite_registry.apply('before-inference', DummyPipeline(ir), ir) + # call inline pass to handle cases like stencils and comprehensions + inline_pass = numba.inline_closurecall.InlineClosureCallPass( + ir, numba.targets.cpu.ParallelOptions(False)) + inline_pass.run() + post_proc = postproc.PostProcessor(ir) + post_proc.run() return ir def replace_arg_nodes(block, args): From a4f7100b59492fa762bdc2920050297b48f4f176 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Sun, 5 Aug 2018 20:59:48 -0400 Subject: [PATCH 02/14] infer constant index in stencils for simple exprs like -c --- numba/stencilparfor.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/numba/stencilparfor.py b/numba/stencilparfor.py index aa906db2b79..ddcaf7e39ff 100644 --- a/numba/stencilparfor.py +++ b/numba/stencilparfor.py @@ -17,7 +17,7 @@ from numba import ir_utils, ir, utils, config, typing from numba.ir_utils import (get_call_table, mk_unique_var, compile_to_numba_ir, replace_arg_nodes, guard, - find_callname) + find_callname, require) from numba.six import exec_ @@ -426,6 +426,12 @@ def _replace_stencil_accesses(self, stencil_blocks, parfor_vars, in_args, else: if hasattr(index_list, 'name') and index_list.name in tuple_table: index_list = tuple_table[index_list.name] + # indices can be inferred as constant in simple expressions + # like -c where c is constant + # handled here since this is a common stencil index pattern + kernel_defs = ir_utils.build_definitions(stencil_blocks) + index_list = [_get_const_index_expr( + kernel_defs, self.func_ir, v) for v in index_list] if index_offsets: index_list = self._add_index_offsets(index_list, list(index_offsets), new_body, scope, loc) @@ -674,3 +680,26 @@ def __init__(self, typingctx, targetctx, args, f_ir): self.typemap = None self.return_type = None self.calltypes = None + + +def _get_const_index_expr(kernel_defs, func_ir, index_var): + """ + infer index_var as constant if it is of the form -c where c is a constant + in the outer function. index_var is assumed to be inside stencil kernel + """ + try: + # match definition inner_var = unary(index_var) + var_def_list = kernel_defs[index_var.name] + require(len(var_def_list) == 1) + var_def = var_def_list[0] + require(isinstance(var_def, ir.Expr) and var_def.op == 'unary') + inner_var = var_def.value + # return -c as constant + const_val = ir_utils.find_const(func_ir, inner_var) + if var_def.fn == '+': + return const_val + elif var_def.fn == '-': + return -const_val + except ir_utils.GuardException: + return index_var + return index_var From ffe579b82d08a29252b6cb13ca1c9b4fae29e8a5 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Sun, 5 Aug 2018 21:13:03 -0400 Subject: [PATCH 03/14] check stencil index to be var --- numba/stencilparfor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/numba/stencilparfor.py b/numba/stencilparfor.py index ddcaf7e39ff..1611f9e2be5 100644 --- a/numba/stencilparfor.py +++ b/numba/stencilparfor.py @@ -688,6 +688,7 @@ def _get_const_index_expr(kernel_defs, func_ir, index_var): in the outer function. index_var is assumed to be inside stencil kernel """ try: + require(isinstance(index_var, ir.Var)) # match definition inner_var = unary(index_var) var_def_list = kernel_defs[index_var.name] require(len(var_def_list) == 1) From 57d36564477f5f5786f79f3caff0e1420afdef27 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Sun, 5 Aug 2018 21:17:27 -0400 Subject: [PATCH 04/14] handle stencil const index not in expr --- numba/stencilparfor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/numba/stencilparfor.py b/numba/stencilparfor.py index 1611f9e2be5..500e367e3b8 100644 --- a/numba/stencilparfor.py +++ b/numba/stencilparfor.py @@ -689,6 +689,10 @@ def _get_const_index_expr(kernel_defs, func_ir, index_var): """ try: require(isinstance(index_var, ir.Var)) + # case where the index is a const itself in outer function + var_const = guard(ir_utils.find_const, func_ir, index_var) + if var_const is not None: + return var_const # match definition inner_var = unary(index_var) var_def_list = kernel_defs[index_var.name] require(len(var_def_list) == 1) From d1e88ac1751dbfb16eaa9657d66a5191f6203d8e Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Sun, 5 Aug 2018 21:19:39 -0400 Subject: [PATCH 05/14] test stencil index constant inference --- numba/tests/test_stencils.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/numba/tests/test_stencils.py b/numba/tests/test_stencils.py index 6c67d0f3931..3b68dce7108 100644 --- a/numba/tests/test_stencils.py +++ b/numba/tests/test_stencils.py @@ -427,6 +427,37 @@ def test_impl_seq(n): n = 100 self.check(test_impl_seq, test_impl, n) + @skip_unsupported + @tag('important') + def test_stencil_call_const(self): + """Tests numba.stencil call that has an index that can be inferred as + constant from a unary expr. Otherwise, this would raise an error since + neighborhood length is not specified. + """ + def test_impl(n): + A = np.arange(n) + B = np.zeros(n) + c = 1 + numba.stencil(lambda a,c : 0.3 * (a[-c] + a[0] + a[c]))( + A, c, out=B) + return B + + def test_impl_seq(n): + A = np.arange(n) + B = np.zeros(n) + c = 1 + for i in range(1, n - 1): + B[i] = 0.3 * (A[i - c] + A[i] + A[i + c]) + return B + + n = 100 + # constant inference is only possible in parallel path + cpfunc = self.compile_parallel(test_impl, (types.intp,)) + expected = test_impl_seq(n) + # parfor result + parfor_output = cpfunc.entry_point(n) + np.testing.assert_almost_equal(parfor_output, expected, decimal=3) + @skip_unsupported @tag('important') def test_stencil_parallel_off(self): From 7eb1fd27db3c2937d2b17e4aa0a3ee797432e21e Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 7 Aug 2018 10:06:42 -0400 Subject: [PATCH 06/14] refactor const index matching to use stencil IR --- numba/stencilparfor.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/numba/stencilparfor.py b/numba/stencilparfor.py index 500e367e3b8..f9088e964ad 100644 --- a/numba/stencilparfor.py +++ b/numba/stencilparfor.py @@ -166,7 +166,7 @@ def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( - stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, + stencil_ir, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) if config.DEBUG_ARRAY_OPT == 1: @@ -350,12 +350,13 @@ def get_start_ind(s_length): ret_var = block.body[-2].value.value return ret_var - def _replace_stencil_accesses(self, stencil_blocks, parfor_vars, in_args, + def _replace_stencil_accesses(self, stencil_ir, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict): """ Convert relative indexing in the stencil kernel to standard indexing by adding the loop index variables to the corresponding dimensions of the array index tuples. """ + stencil_blocks = stencil_ir.blocks in_arr = in_args[0] in_arg_names = [x.name for x in in_args] @@ -429,9 +430,9 @@ def _replace_stencil_accesses(self, stencil_blocks, parfor_vars, in_args, # indices can be inferred as constant in simple expressions # like -c where c is constant # handled here since this is a common stencil index pattern - kernel_defs = ir_utils.build_definitions(stencil_blocks) + stencil_ir._definitions = ir_utils.build_definitions(stencil_blocks) index_list = [_get_const_index_expr( - kernel_defs, self.func_ir, v) for v in index_list] + stencil_ir, self.func_ir, v) for v in index_list] if index_offsets: index_list = self._add_index_offsets(index_list, list(index_offsets), new_body, scope, loc) @@ -682,7 +683,7 @@ def __init__(self, typingctx, targetctx, args, f_ir): self.calltypes = None -def _get_const_index_expr(kernel_defs, func_ir, index_var): +def _get_const_index_expr(stencil_ir, func_ir, index_var): """ infer index_var as constant if it is of the form -c where c is a constant in the outer function. index_var is assumed to be inside stencil kernel @@ -693,17 +694,16 @@ def _get_const_index_expr(kernel_defs, func_ir, index_var): var_const = guard(ir_utils.find_const, func_ir, index_var) if var_const is not None: return var_const - # match definition inner_var = unary(index_var) - var_def_list = kernel_defs[index_var.name] - require(len(var_def_list) == 1) - var_def = var_def_list[0] - require(isinstance(var_def, ir.Expr) and var_def.op == 'unary') - inner_var = var_def.value + # get index definition + index_def = ir_utils.get_definition(stencil_ir, index_var) + # match inner_var = unary(index_var) + require(isinstance(index_def, ir.Expr) and index_def.op == 'unary') + inner_var = index_def.value # return -c as constant const_val = ir_utils.find_const(func_ir, inner_var) - if var_def.fn == '+': + if index_def.fn == '+': return const_val - elif var_def.fn == '-': + elif index_def.fn == '-': return -const_val except ir_utils.GuardException: return index_var From d5cf119b759def43daf86867aca15d98b94996b2 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 7 Aug 2018 10:46:42 -0400 Subject: [PATCH 07/14] support binary ops in stencil const index matching --- numba/stencilparfor.py | 55 ++++++++++++++++++++++++++++-------- numba/tests/test_stencils.py | 19 ++++++++++--- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/numba/stencilparfor.py b/numba/stencilparfor.py index f9088e964ad..caceffe07b6 100644 --- a/numba/stencilparfor.py +++ b/numba/stencilparfor.py @@ -17,7 +17,7 @@ from numba import ir_utils, ir, utils, config, typing from numba.ir_utils import (get_call_table, mk_unique_var, compile_to_numba_ir, replace_arg_nodes, guard, - find_callname, require) + find_callname, require, find_const, GuardException) from numba.six import exec_ @@ -691,20 +691,53 @@ def _get_const_index_expr(stencil_ir, func_ir, index_var): try: require(isinstance(index_var, ir.Var)) # case where the index is a const itself in outer function - var_const = guard(ir_utils.find_const, func_ir, index_var) + var_const = guard(_get_const_two_irs, stencil_ir, func_ir, index_var) if var_const is not None: return var_const # get index definition index_def = ir_utils.get_definition(stencil_ir, index_var) # match inner_var = unary(index_var) - require(isinstance(index_def, ir.Expr) and index_def.op == 'unary') - inner_var = index_def.value - # return -c as constant - const_val = ir_utils.find_const(func_ir, inner_var) - if index_def.fn == '+': - return const_val - elif index_def.fn == '-': - return -const_val - except ir_utils.GuardException: + var_const = guard( + _get_const_unary_expr, stencil_ir, func_ir, index_def) + if var_const is not None: + return var_const + # match inner_var = arg1 + arg2 + var_const = guard( + _get_const_binary_expr, stencil_ir, func_ir, index_def) + if var_const is not None: + return var_const + + except GuardException: return index_var return index_var + +def _get_const_two_irs(ir1, ir2, var): + """get constant in either of two IRs if available + otherwise, throw GuardException + """ + var_const = guard(find_const, ir1, var) + if var_const is not None: + return var_const + var_const = guard(find_const, ir2, var) + if var_const is not None: + return var_const + raise GuardException + +def _get_const_unary_expr(stencil_ir, func_ir, index_def): + """evaluate constant unary expr if possible + otherwise, raise GuardException + """ + require(isinstance(index_def, ir.Expr) and index_def.op == 'unary') + inner_var = index_def.value + # return -c as constant + const_val = _get_const_two_irs(stencil_ir, func_ir, inner_var) + return eval("{}{}".format(index_def.fn, const_val)) + +def _get_const_binary_expr(stencil_ir, func_ir, index_def): + """evaluate constant binary expr if possible + otherwise, raise GuardException + """ + require(isinstance(index_def, ir.Expr) and index_def.op == 'binop') + arg1 = _get_const_two_irs(stencil_ir, func_ir, index_def.lhs) + arg2 = _get_const_two_irs(stencil_ir, func_ir, index_def.rhs) + return eval("{}{}{}".format(arg1, index_def.fn, arg2)) diff --git a/numba/tests/test_stencils.py b/numba/tests/test_stencils.py index 3b68dce7108..b1aa545dcc5 100644 --- a/numba/tests/test_stencils.py +++ b/numba/tests/test_stencils.py @@ -434,7 +434,7 @@ def test_stencil_call_const(self): constant from a unary expr. Otherwise, this would raise an error since neighborhood length is not specified. """ - def test_impl(n): + def test_impl1(n): A = np.arange(n) B = np.zeros(n) c = 1 @@ -442,6 +442,14 @@ def test_impl(n): A, c, out=B) return B + def test_impl2(n): + A = np.arange(n) + B = np.zeros(n) + c = 2 + numba.stencil(lambda a,c : 0.3 * (a[1-c] + a[0] + a[c-1]))( + A, c, out=B) + return B + def test_impl_seq(n): A = np.arange(n) B = np.zeros(n) @@ -452,11 +460,14 @@ def test_impl_seq(n): n = 100 # constant inference is only possible in parallel path - cpfunc = self.compile_parallel(test_impl, (types.intp,)) + cpfunc1 = self.compile_parallel(test_impl1, (types.intp,)) + cpfunc2 = self.compile_parallel(test_impl2, (types.intp,)) expected = test_impl_seq(n) # parfor result - parfor_output = cpfunc.entry_point(n) - np.testing.assert_almost_equal(parfor_output, expected, decimal=3) + parfor_output1 = cpfunc1.entry_point(n) + parfor_output2 = cpfunc2.entry_point(n) + np.testing.assert_almost_equal(parfor_output1, expected, decimal=3) + np.testing.assert_almost_equal(parfor_output2, expected, decimal=3) @skip_unsupported @tag('important') From bce1a85213557d92f2890f508782e0d2b9ae8e7f Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 14 Aug 2018 13:18:41 -0400 Subject: [PATCH 08/14] infer stencil index consts recursively --- numba/stencilparfor.py | 59 +++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/numba/stencilparfor.py b/numba/stencilparfor.py index caceffe07b6..5fb5ff42a7d 100644 --- a/numba/stencilparfor.py +++ b/numba/stencilparfor.py @@ -685,32 +685,39 @@ def __init__(self, typingctx, targetctx, args, f_ir): def _get_const_index_expr(stencil_ir, func_ir, index_var): """ - infer index_var as constant if it is of the form -c where c is a constant - in the outer function. index_var is assumed to be inside stencil kernel + infer index_var as constant if it is of a expression form like c-1 where c + is a constant in the outer function. + index_var is assumed to be inside stencil kernel """ - try: - require(isinstance(index_var, ir.Var)) - # case where the index is a const itself in outer function - var_const = guard(_get_const_two_irs, stencil_ir, func_ir, index_var) - if var_const is not None: - return var_const - # get index definition - index_def = ir_utils.get_definition(stencil_ir, index_var) - # match inner_var = unary(index_var) - var_const = guard( - _get_const_unary_expr, stencil_ir, func_ir, index_def) - if var_const is not None: - return var_const - # match inner_var = arg1 + arg2 - var_const = guard( - _get_const_binary_expr, stencil_ir, func_ir, index_def) - if var_const is not None: - return var_const - - except GuardException: - return index_var + const_val = guard( + _get_const_index_expr_inner, stencil_ir, func_ir, index_var) + if const_val is not None: + return const_val return index_var +def _get_const_index_expr_inner(stencil_ir, func_ir, index_var): + """inner constant inference function that calls constant, unary and binary + cases. + """ + require(isinstance(index_var, ir.Var)) + # case where the index is a const itself in outer function + var_const = guard(_get_const_two_irs, stencil_ir, func_ir, index_var) + if var_const is not None: + return var_const + # get index definition + index_def = ir_utils.get_definition(stencil_ir, index_var) + # match inner_var = unary(index_var) + var_const = guard( + _get_const_unary_expr, stencil_ir, func_ir, index_def) + if var_const is not None: + return var_const + # match inner_var = arg1 + arg2 + var_const = guard( + _get_const_binary_expr, stencil_ir, func_ir, index_def) + if var_const is not None: + return var_const + raise GuardException + def _get_const_two_irs(ir1, ir2, var): """get constant in either of two IRs if available otherwise, throw GuardException @@ -730,7 +737,7 @@ def _get_const_unary_expr(stencil_ir, func_ir, index_def): require(isinstance(index_def, ir.Expr) and index_def.op == 'unary') inner_var = index_def.value # return -c as constant - const_val = _get_const_two_irs(stencil_ir, func_ir, inner_var) + const_val = _get_const_index_expr_inner(stencil_ir, func_ir, inner_var) return eval("{}{}".format(index_def.fn, const_val)) def _get_const_binary_expr(stencil_ir, func_ir, index_def): @@ -738,6 +745,6 @@ def _get_const_binary_expr(stencil_ir, func_ir, index_def): otherwise, raise GuardException """ require(isinstance(index_def, ir.Expr) and index_def.op == 'binop') - arg1 = _get_const_two_irs(stencil_ir, func_ir, index_def.lhs) - arg2 = _get_const_two_irs(stencil_ir, func_ir, index_def.rhs) + arg1 = _get_const_index_expr_inner(stencil_ir, func_ir, index_def.lhs) + arg2 = _get_const_index_expr_inner(stencil_ir, func_ir, index_def.rhs) return eval("{}{}{}".format(arg1, index_def.fn, arg2)) From b2601510b9c2bf8134e5adea0194730ab3bc7e61 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 14 Aug 2018 13:18:59 -0400 Subject: [PATCH 09/14] add test for recursive stencil const inference --- numba/tests/test_stencils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/numba/tests/test_stencils.py b/numba/tests/test_stencils.py index b1aa545dcc5..f79df670492 100644 --- a/numba/tests/test_stencils.py +++ b/numba/tests/test_stencils.py @@ -450,6 +450,14 @@ def test_impl2(n): A, c, out=B) return B + def test_impl3(n): + A = np.arange(n) + B = np.zeros(n) + c = 2 + numba.stencil(lambda a,c : 0.3 * (a[-c+1] + a[0] + a[c-1]))( + A, c, out=B) + return B + def test_impl_seq(n): A = np.arange(n) B = np.zeros(n) @@ -462,12 +470,15 @@ def test_impl_seq(n): # constant inference is only possible in parallel path cpfunc1 = self.compile_parallel(test_impl1, (types.intp,)) cpfunc2 = self.compile_parallel(test_impl2, (types.intp,)) + cpfunc3 = self.compile_parallel(test_impl3, (types.intp,)) expected = test_impl_seq(n) # parfor result parfor_output1 = cpfunc1.entry_point(n) parfor_output2 = cpfunc2.entry_point(n) + parfor_output3 = cpfunc3.entry_point(n) np.testing.assert_almost_equal(parfor_output1, expected, decimal=3) np.testing.assert_almost_equal(parfor_output2, expected, decimal=3) + np.testing.assert_almost_equal(parfor_output3, expected, decimal=3) @skip_unsupported @tag('important') From 5c85414cbd37ec0f2e6753ab50b073c55e0a3b17 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 14 Aug 2018 13:23:16 -0400 Subject: [PATCH 10/14] add test for mutiple const stencil inference --- numba/tests/test_stencils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/numba/tests/test_stencils.py b/numba/tests/test_stencils.py index f79df670492..6b072377655 100644 --- a/numba/tests/test_stencils.py +++ b/numba/tests/test_stencils.py @@ -450,6 +450,7 @@ def test_impl2(n): A, c, out=B) return B + # recursive expr case def test_impl3(n): A = np.arange(n) B = np.zeros(n) @@ -458,6 +459,16 @@ def test_impl3(n): A, c, out=B) return B + # multi-constant case + def test_impl4(n): + A = np.arange(n) + B = np.zeros(n) + d = 1 + c = 2 + numba.stencil(lambda a,c,d : 0.3 * (a[-c+d] + a[0] + a[c-d]))( + A, c, d, out=B) + return B + def test_impl_seq(n): A = np.arange(n) B = np.zeros(n) @@ -471,14 +482,17 @@ def test_impl_seq(n): cpfunc1 = self.compile_parallel(test_impl1, (types.intp,)) cpfunc2 = self.compile_parallel(test_impl2, (types.intp,)) cpfunc3 = self.compile_parallel(test_impl3, (types.intp,)) + cpfunc4 = self.compile_parallel(test_impl4, (types.intp,)) expected = test_impl_seq(n) # parfor result parfor_output1 = cpfunc1.entry_point(n) parfor_output2 = cpfunc2.entry_point(n) parfor_output3 = cpfunc3.entry_point(n) + parfor_output4 = cpfunc4.entry_point(n) np.testing.assert_almost_equal(parfor_output1, expected, decimal=3) np.testing.assert_almost_equal(parfor_output2, expected, decimal=3) np.testing.assert_almost_equal(parfor_output3, expected, decimal=3) + np.testing.assert_almost_equal(parfor_output4, expected, decimal=3) @skip_unsupported @tag('important') From 5ec88622f0b313438c61910032b105dc16763a46 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Tue, 14 Aug 2018 13:43:18 -0400 Subject: [PATCH 11/14] docs for stencil const inference --- docs/source/user/stencil.rst | 38 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/docs/source/user/stencil.rst b/docs/source/user/stencil.rst index 2a32601148f..400aef04b40 100644 --- a/docs/source/user/stencil.rst +++ b/docs/source/user/stencil.rst @@ -7,7 +7,7 @@ Using the ``@stencil`` decorator ================================ -Stencils are a common computational pattern in which array elements +Stencils are a common computational pattern in which array elements are updated according to some fixed pattern called the stencil kernel. Numba provides the ``@stencil`` decorator so that users may easily specify a stencil kernel and Numba then generates the looping @@ -21,7 +21,7 @@ Basic usage =========== An example use of the ``@stencil`` decorator:: - + from numba import stencil @stencil @@ -38,8 +38,8 @@ Conceptually, the stencil kernel is run once for each element in the output array. The return value from the stencil kernel is the value written into the output array for that particular element. -The parameter ``a`` represents the input array over which the -kernel is applied. +The parameter ``a`` represents the input array over which the +kernel is applied. Indexing into this array takes place with respect to the current element of the output array being processed. For example, if element ``(x, y)`` is being processed then ``a[0, 0]`` in the stencil kernel corresponds to @@ -48,9 +48,9 @@ kernel corresponds to ``a[x - 1, y + 1]`` in the input array. Depending on the specified kernel, the kernel may not be applicable to the borders of the output array as this may cause the input array to be -accessed out-of-bounds. The way in which the stencil decorator handles -this situation is dependent upon which :ref:`stencil-mode` is selected. -The default mode is for the stencil decorator to set the border elements +accessed out-of-bounds. The way in which the stencil decorator handles +this situation is dependent upon which :ref:`stencil-mode` is selected. +The default mode is for the stencil decorator to set the border elements of the output array to zero. To invoke a stencil on an input array, call the stencil as if it were @@ -105,13 +105,13 @@ all such input array arguments. Kernel shape inference and border handling ========================================== -In the above example and in most cases, the array indexing in the +In the above example and in most cases, the array indexing in the stencil kernel will exclusively use ``Integer`` literals. In such cases, the stencil decorator is able to analyze the stencil kernel to determine its size. In the above example, the stencil decorator determines that the kernel is ``3 x 3`` in shape since indices ``-1`` to ``1`` are used for both the first and second dimensions. Note that -the stencil decorator also correctly handles non-symmetric and +the stencil decorator also correctly handles non-symmetric and non-square stencil kernels. Based on the size of the stencil kernel, the stencil decorator is @@ -122,11 +122,19 @@ of the output array. In the above example, points ``-1`` and ``+1`` are accessed in each dimension and thus the output array has a border of size one in all dimensions. +The parallel mode is able to infer kernel indices as constants from +simple expressions if possible. For example:: + + @njit(parallel=True) + def stencil_test(A): + c = 2 + return stencil(lambda a, c, d2: 0.3 * (a[-c+1] + a[0] + a[c-1]))(A, c) + Stencil decorator options ========================= -While the stencil decorator may be augmented in the future to +While the stencil decorator may be augmented in the future to provide additional mechanisms for border handling, at the moment the stencil decorator currently supports only one option. @@ -138,7 +146,7 @@ the stencil decorator currently supports only one option. Sometimes it may be inconvenient to write the stencil kernel exclusively with ``Integer`` literals. For example, let us say we would like to compute the trailing 30-day moving average of a -time series of data. One could write +time series of data. One could write ``(a[-29] + a[-28] + ... + a[-1] + a[0]) / 30`` but the stencil decorator offers a more concise form using the ``neighborhood`` option:: @@ -176,7 +184,7 @@ to a constant value, as specified by the ``cval`` parameter. The optional cval parameter defaults to zero but can be set to any desired value, which is then used for the border of the output array -if the mode parameter is set to ``constant``. The cval parameter is +if the mode parameter is set to ``constant``. The cval parameter is ignored in all other modes. The type of the cval parameter must match the return type of the stencil kernel. If the user wishes the output array to be constructed from a particular type then they should ensure @@ -206,7 +214,7 @@ The stencil decorator returns a callable object of type ``StencilFunc``. ``StencilFunc`` objects contains a number of attributes but the only one of potential interest to users is the ``neighborhood`` attribute. If the ``neighborhood`` option was passed to the stencil decorator then -the provided neighborhood is stored in this attribute. Else, upon +the provided neighborhood is stored in this attribute. Else, upon first execution or compilation, the system calculates the neighborhood as described above and then stores the computed neighborhood into this attribute. A user may then inspect the attribute if they wish to verify @@ -226,8 +234,8 @@ also include the following optional parameter. ------- The optional ``out`` parameter is added to every stencil function -generated by Numba. If specified, the ``out`` parameter tells -Numba that the user is providing their own pre-allocated array +generated by Numba. If specified, the ``out`` parameter tells +Numba that the user is providing their own pre-allocated array to be used for the output of the stencil. In this case, the stencil function will not allocate its own output array. Users should assure that the return type of the stencil kernel can From e3f2bdab213a16cb6b6ae4cc6ad3f8b1866b0824 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Thu, 16 Aug 2018 11:44:55 -0400 Subject: [PATCH 12/14] fix stencil const doc code --- docs/source/user/stencil.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/source/user/stencil.rst b/docs/source/user/stencil.rst index 400aef04b40..49154143922 100644 --- a/docs/source/user/stencil.rst +++ b/docs/source/user/stencil.rst @@ -127,8 +127,10 @@ simple expressions if possible. For example:: @njit(parallel=True) def stencil_test(A): - c = 2 - return stencil(lambda a, c, d2: 0.3 * (a[-c+1] + a[0] + a[c-1]))(A, c) + c = 2 + B = stencil( + lambda a, c: 0.3 * (a[-c+1] + a[0] + a[c-1]))(A, c) + return B Stencil decorator options From 4d29555b88845ccefb0a9c17089d95badc7d54eb Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Thu, 16 Aug 2018 14:14:27 -0400 Subject: [PATCH 13/14] improve stencil kernel constant index error and check error in non-parallel paths --- numba/stencil.py | 7 ++++--- numba/tests/test_stencils.py | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/numba/stencil.py b/numba/stencil.py index dc7dc32e37e..75af60ae304 100644 --- a/numba/stencil.py +++ b/numba/stencil.py @@ -182,8 +182,8 @@ def add_indices_to_kernel(self, kernel, index_names, ndim, elif stmt_index_var.name in const_dict: kernel_consts += [const_dict[stmt_index_var.name]] else: - raise ValueError("Non-constant specified for " - "stencil kernel index.") + raise ValueError("stencil kernel index is not " + "constant, 'neighborhood' option required") if ndim == 1: # Single dimension always has index variable 'index0'. @@ -261,7 +261,8 @@ def add_indices_to_kernel(self, kernel, index_names, ndim, neighborhood[i][1] = max(neighborhood[i][1], te) else: raise ValueError( - "Non-constant used as stencil index.") + "stencil kernel index is not constant," + "'neighborhood' option required") index_len = len(index) elif isinstance(index, int): neighborhood[0][0] = min(neighborhood[0][0], index) diff --git a/numba/tests/test_stencils.py b/numba/tests/test_stencils.py index 6b072377655..af387f50f85 100644 --- a/numba/tests/test_stencils.py +++ b/numba/tests/test_stencils.py @@ -494,6 +494,20 @@ def test_impl_seq(n): np.testing.assert_almost_equal(parfor_output3, expected, decimal=3) np.testing.assert_almost_equal(parfor_output4, expected, decimal=3) + # check error in regular Python path + with self.assertRaises(ValueError) as e: + test_impl4(4) + + self.assertIn("stencil kernel index is not constant, " + "'neighborhood' option required", str(e.exception)) + # check error in njit path + # TODO: ValueError should be thrown instead of LoweringError + with self.assertRaises(LoweringError) as e: + njit(test_impl4)(4) + + self.assertIn("stencil kernel index is not constant, " + "'neighborhood' option required", str(e.exception)) + @skip_unsupported @tag('important') def test_stencil_parallel_off(self): From 1268eaf99c715539e0ec4cca07fa937521b75102 Mon Sep 17 00:00:00 2001 From: Ehsan Totoni Date: Thu, 16 Aug 2018 16:13:39 -0400 Subject: [PATCH 14/14] fix circular import for py2 --- numba/ir_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numba/ir_utils.py b/numba/ir_utils.py index 7b50a6153d1..5123902929c 100644 --- a/numba/ir_utils.py +++ b/numba/ir_utils.py @@ -11,8 +11,7 @@ import numba from numba.six import exec_ -from numba import (ir, types, typing, config, analysis, utils, cgutils, - rewrites, postproc) +from numba import ir, types, typing, config, analysis, utils, cgutils, rewrites from numba.typing.templates import signature, infer_global, AbstractTemplate from numba.targets.imputils import impl_ret_untracked from numba.analysis import (compute_live_map, compute_use_defs, @@ -1574,6 +1573,7 @@ def __init__(self, f_ir): inline_pass = numba.inline_closurecall.InlineClosureCallPass( ir, numba.targets.cpu.ParallelOptions(False)) inline_pass.run() + from numba import postproc post_proc = postproc.PostProcessor(ir) post_proc.run() return ir