Skip to content

Commit

Permalink
Merge pull request #6335 from stuartarchibald/wip/enable_more_vectori…
Browse files Browse the repository at this point in the history
…zation_1

Split optimisation passes.
  • Loading branch information
stuartarchibald committed Oct 13, 2020
2 parents 0ac1147 + 449d0f5 commit 790373e
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 8 deletions.
26 changes: 18 additions & 8 deletions numba/core/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,14 @@ def _optimize_final_module(self):
"""
Internal: optimize this library's final module.
"""
self._codegen._mpm.run(self._final_module)
# A cheaper optimisation pass is run first to try and get as many
# refops into the same function as possible via inlining
self._codegen._mpm_cheap.run(self._final_module)
# Refop pruning is then run on the heavily inlined function
if not config.LLVM_REFPRUNE_PASS:
self._final_module = remove_redundant_nrt_refct(self._final_module)
# The full optimisation suite is then run on the refop pruned IR
self._codegen._mpm_full.run(self._final_module)

def _get_module_for_linking(self):
"""
Expand Down Expand Up @@ -1064,7 +1069,9 @@ def _init(self, llvm_module):
self._engine = JitEngine(engine)
self._target_data = engine.target_data
self._data_layout = str(self._target_data)
self._mpm = self._module_pass_manager()
self._mpm_cheap = self._module_pass_manager(loop_vectorize=False,
opt=1)
self._mpm_full = self._module_pass_manager()

self._engine.set_object_cache(self._library_class._object_compiled_hook,
self._library_class._object_getbuffer_hook)
Expand Down Expand Up @@ -1093,25 +1100,25 @@ def create_library(self, name):
def unserialize_library(self, serialized):
return self._library_class._unserialize(self, serialized)

def _module_pass_manager(self):
def _module_pass_manager(self, **kwargs):
pm = ll.create_module_pass_manager()
self._tm.add_analysis_passes(pm)
with self._pass_manager_builder() as pmb:
with self._pass_manager_builder(**kwargs) as pmb:
pmb.populate(pm)
if config.LLVM_REFPRUNE_PASS:
pm.add_refprune_pass(_parse_refprune_flags())
return pm

def _function_pass_manager(self, llvm_module):
def _function_pass_manager(self, llvm_module, **kwargs):
pm = ll.create_function_pass_manager(llvm_module)
self._tm.add_analysis_passes(pm)
with self._pass_manager_builder() as pmb:
with self._pass_manager_builder(**kwargs) as pmb:
pmb.populate(pm)
if config.LLVM_REFPRUNE_PASS:
pm.add_refprune_pass(_parse_refprune_flags())
return pm

def _pass_manager_builder(self):
def _pass_manager_builder(self, **kwargs):
"""
Create a PassManagerBuilder.
Expand All @@ -1120,8 +1127,11 @@ def _pass_manager_builder(self):
or function pass manager. Otherwise some optimizations will be
missed...
"""
opt_level = kwargs.pop('opt', config.OPT)
loop_vectorize = kwargs.pop('loop_vectorize', config.LOOP_VECTORIZE)

pmb = lp.create_pass_manager_builder(
opt=config.OPT, loop_vectorize=config.LOOP_VECTORIZE)
opt=opt_level, loop_vectorize=loop_vectorize, **kwargs)
return pmb

def _check_llvm_bugs(self):
Expand Down
36 changes: 36 additions & 0 deletions numba/tests/test_vectorization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import numpy as np
from numba import njit, types
from unittest import TestCase
from numba.tests.support import override_env_config

_DEBUG = False
if _DEBUG:
from llvmlite import binding as llvm
# Prints debug info from the LLVMs vectorizer
llvm.set_option("", "--debug-only=loop-vectorize")


class TestVectorization(TestCase):
"""
Tests to assert that code which should vectorize does indeed vectorize
"""
def gen_ir(self, func, args_tuple, **flags):
with override_env_config(
"NUMBA_CPU_NAME", "skylake-avx512"
), override_env_config("NUMBA_CPU_FEATURES", ""):
jobj = njit(**flags)(func)
jobj.compile(args_tuple)
ol = jobj.overloads[jobj.signatures[0]]
return ol.library.get_llvm_str()

def test_nditer_loop(self):
# see https://github.com/numba/numba/issues/5033
def do_sum(x):
acc = 0
for v in np.nditer(x):
acc += v.item()
return acc

llvm_ir = self.gen_ir(do_sum, (types.float64[::1],), fastmath=True)
self.assertIn("vector.body", llvm_ir)
self.assertIn("llvm.loop.isvectorized", llvm_ir)

0 comments on commit 790373e

Please sign in to comment.