diff --git a/docs/source/reference/envvars.rst b/docs/source/reference/envvars.rst
index 8b0645ea3c0..63f9be65096 100644
--- a/docs/source/reference/envvars.rst
+++ b/docs/source/reference/envvars.rst
@@ -35,6 +35,23 @@ for permanent use by adding::
 
     color_scheme: dark_bg
 
+Jit flags
+---------
+
+These variables globally override flags to the :func:`~numba.jit` decorator.
+
+.. envvar:: NUMBA_BOUNDSCHECK
+
+   If set to 0 or 1, globally disable or enable bounds checking, respectively.
+   The default if the variable is not set or set to an empty string is to use
+   the ``boundscheck`` flag passed to the :func:`~numba.jit` decorator for a
+   given function. See the documentation of :ref:`@jit
+   <jit-decorator-boundscheck>` for more information.
+
+   Note, due to limitations in numba, the bounds checking currently produces
+   exception messages that do not match those from NumPy. If you set
+   ``NUMBA_FULL_TRACEBACKS=1``, the full exception message with the axis,
+   index, and shape information will be printed to the terminal.
 
 Debugging
 ---------
@@ -363,4 +380,3 @@ Threading Control
    * ``tbb`` - A threading layer backed by Intel TBB.
    * ``omp`` - A threading layer backed by OpenMP.
    * ``workqueue`` - A simple built-in work-sharing task scheduler.
-
diff --git a/docs/source/reference/jit-compilation.rst b/docs/source/reference/jit-compilation.rst
index d037b78a642..7ccddc639fc 100644
--- a/docs/source/reference/jit-compilation.rst
+++ b/docs/source/reference/jit-compilation.rst
@@ -7,7 +7,7 @@ JIT functions
 
 .. _jit-decorator:
 
-.. decorator:: numba.jit(signature=None, nopython=False, nogil=False, cache=False, forceobj=False, parallel=False, error_model='python', fastmath=False, locals={})
+.. decorator:: numba.jit(signature=None, nopython=False, nogil=False, cache=False, forceobj=False, parallel=False, error_model='python', fastmath=False, locals={}, boundscheck=False)
 
    Compile the decorated function on-the-fly to produce efficient machine
    code.  All parameters are optional.
@@ -74,7 +74,7 @@ JIT functions
    .. _jit-decorator-parallel:
 
    If true, *parallel* enables the automatic parallelization of a number of
-   common Numpy constructs as well as the fusion of adjacent parallel 
+   common Numpy constructs as well as the fusion of adjacent parallel
    operations to maximize cache locality.
 
    The *error_model* option controls the divide-by-zero behavior.
@@ -95,6 +95,16 @@ JIT functions
    accurate versions of some math intrinsics are used (answers to within
    ``4 ULP``).
 
+   .. _jit-decorator-boundscheck:
+
+   If True, ``boundscheck`` enables bounds checking for array indices. Out of
+   bounds accesses will raise IndexError. The default is to not do bounds
+   checking. If bounds checking is disabled, out of bounds accesses can
+   produce garbage results or segfaults. However, enabling bounds checking
+   will slow down typical functions, so it is recommended to only use this
+   flag for debugging. You can also set the `NUMBA_BOUNDSCHECK` environment
+   variable to 0 or 1 to globally override this flag.
+
    The *locals* dictionary may be used to force the :ref:`numba-types`
    of particular local variables, for example if you want to force the
    use of single precision floats at some point.  In general, we recommend
diff --git a/numba/cgutils.py b/numba/cgutils.py
index 0056312e987..cdce2513163 100644
--- a/numba/cgutils.py
+++ b/numba/cgutils.py
@@ -657,16 +657,53 @@ def unpack_tuple(builder, tup, count=None):
     return vals
 
 
-def get_item_pointer(builder, aryty, ary, inds, wraparound=False):
+def get_item_pointer(context, builder, aryty, ary, inds, wraparound=False,
+                     boundscheck=False):
+    # Set boundscheck=True for any pointer access that should be
+    # boundschecked. do_boundscheck() will handle enabling or disabling the
+    # actual boundschecking based on the user config.
     shapes = unpack_tuple(builder, ary.shape, count=aryty.ndim)
     strides = unpack_tuple(builder, ary.strides, count=aryty.ndim)
-    return get_item_pointer2(builder, data=ary.data, shape=shapes,
+    return get_item_pointer2(context, builder, data=ary.data, shape=shapes,
                              strides=strides, layout=aryty.layout, inds=inds,
-                             wraparound=wraparound)
+                             wraparound=wraparound, boundscheck=boundscheck)
 
 
-def get_item_pointer2(builder, data, shape, strides, layout, inds,
-                      wraparound=False):
+def do_boundscheck(context, builder, ind, dimlen, axis=None):
+    def _dbg():
+        # Remove this when we figure out how to include this information
+        # in the error message.
+        if axis is not None:
+            if isinstance(axis, int):
+                printf(builder, "debug: IndexError: index %d is out of bounds "
+                       "for axis {} with size %d\n".format(axis), ind, dimlen)
+            else:
+                printf(builder, "debug: IndexError: index %d is out of bounds "
+                       "for axis %d with size %d\n".format(axis), ind, axis,
+                       dimlen)
+        else:
+            printf(builder,
+                   "debug: IndexError: index %d is out of bounds for size %d\n",
+                   ind, dimlen)
+
+    msg = "index is out of bounds"
+    out_of_bounds_upper = builder.icmp_signed('>=', ind, dimlen)
+    with if_unlikely(builder, out_of_bounds_upper):
+        if config.FULL_TRACEBACKS:
+            _dbg()
+        context.call_conv.return_user_exc(builder, IndexError, (msg,))
+    out_of_bounds_lower = builder.icmp_signed('<', ind, ind.type(0))
+    with if_unlikely(builder, out_of_bounds_lower):
+        if config.FULL_TRACEBACKS:
+            _dbg()
+        context.call_conv.return_user_exc(builder, IndexError, (msg,))
+
+
+def get_item_pointer2(context, builder, data, shape, strides, layout, inds,
+                      wraparound=False, boundscheck=False):
+    # Set boundscheck=True for any pointer access that should be
+    # boundschecked. do_boundscheck() will handle enabling or disabling the
+    # actual boundschecking based on the user config.
     if wraparound:
         # Wraparound
         indices = []
@@ -677,6 +714,10 @@ def get_item_pointer2(builder, data, shape, strides, layout, inds,
             indices.append(selected)
     else:
         indices = inds
+    if boundscheck:
+        for axis, (ind, dimlen) in enumerate(zip(indices, shape)):
+            do_boundscheck(context, builder, ind, dimlen, axis)
+
     if not indices:
         # Indexing with empty tuple
         return builder.gep(data, [int32_t(0)])
diff --git a/numba/compiler.py b/numba/compiler.py
index ea36f0bef2f..3f021b4e294 100644
--- a/numba/compiler.py
+++ b/numba/compiler.py
@@ -47,7 +47,7 @@ class Flags(utils.ConfigOptions):
         'release_gil': False,
         'no_compile': False,
         'debuginfo': False,
-        'boundcheck': False,
+        'boundscheck': False,
         'forceinline': False,
         'no_cpython_wrapper': False,
         # Enable automatic parallel optimization, can be fine-tuned by taking
@@ -251,8 +251,8 @@ def _make_subtarget(targetctx, flags):
     subtargetoptions = {}
     if flags.debuginfo:
         subtargetoptions['enable_debuginfo'] = True
-    if flags.boundcheck:
-        subtargetoptions['enable_boundcheck'] = True
+    if flags.boundscheck:
+        subtargetoptions['enable_boundscheck'] = True
     if flags.nrt:
         subtargetoptions['enable_nrt'] = True
     if flags.auto_parallel:
@@ -422,7 +422,6 @@ class DefaultPassBuilder(object):
       - objectmode
       - interpreted
     """
-
     @staticmethod
     def define_nopython_pipeline(state, name='nopython'):
         """Returns an nopython mode pipeline based PassManager
diff --git a/numba/config.py b/numba/config.py
index 61a4435faa5..d21d696bf00 100644
--- a/numba/config.py
+++ b/numba/config.py
@@ -144,6 +144,11 @@ def optional_str(x):
         # just bold fonts in use.
         COLOR_SCHEME = _readenv("NUMBA_COLOR_SCHEME", str, "no_color")
 
+        # Whether to globally enable bounds checking. The default None means
+        # to use the value of the flag to @njit. 0 or 1 overrides the flag
+        # globally.
+        BOUNDSCHECK = _readenv("NUMBA_BOUNDSCHECK", int, None)
+
         # Debug flag to control compiler debug print
         DEBUG = _readenv("NUMBA_DEBUG", int, 0)
 
diff --git a/numba/cuda/compiler.py b/numba/cuda/compiler.py
index d242e11b5d3..fa65b2e1aa2 100644
--- a/numba/cuda/compiler.py
+++ b/numba/cuda/compiler.py
@@ -37,7 +37,6 @@ def compile_cuda(pyfunc, return_type, args, debug, inline):
     flags.set('no_compile')
     flags.set('no_cpython_wrapper')
     if debug:
-        flags.set('boundcheck')
         flags.set('debuginfo')
     if inline:
         flags.set('forceinline')
diff --git a/numba/cuda/cudaimpl.py b/numba/cuda/cudaimpl.py
index c979f2f5d83..38742f0afb3 100644
--- a/numba/cuda/cudaimpl.py
+++ b/numba/cuda/cudaimpl.py
@@ -536,7 +536,7 @@ def imp(context, builder, sig, args):
                             (aryty.ndim, len(indty)))
 
         lary = context.make_array(aryty)(context, builder, ary)
-        ptr = cgutils.get_item_pointer(builder, aryty, lary, indices)
+        ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)
         # dispatcher to implementation base on dtype
         return dispatch_fn(context, builder, dtype, ptr, val)
     return imp
@@ -607,7 +607,7 @@ def ptx_atomic_cas_tuple(context, builder, sig, args):
 
     lary = context.make_array(aryty)(context, builder, ary)
     zero = context.get_constant(types.intp, 0)
-    ptr = cgutils.get_item_pointer(builder, aryty, lary, (zero,))
+    ptr = cgutils.get_item_pointer(context, builder, aryty, lary, (zero,))
     if aryty.dtype == types.int32:
         lmod = builder.module
         return builder.call(nvvmutils.declare_atomic_cas_int32(lmod),
diff --git a/numba/cuda/decorators.py b/numba/cuda/decorators.py
index 56e3ff4e105..d5cecef5bfa 100644
--- a/numba/cuda/decorators.py
+++ b/numba/cuda/decorators.py
@@ -52,6 +52,9 @@ def jit(func_or_sig=None, argtypes=None, device=False, inline=False, bind=True,
     if link and config.ENABLE_CUDASIM:
         raise NotImplementedError('Cannot link PTX in the simulator')
 
+    if 'boundscheck' in kws:
+        raise NotImplementedError("bounds checking is not supported for CUDA")
+
     fastmath = kws.get('fastmath', False)
     if argtypes is None and not sigutils.is_signature(func_or_sig):
         if func_or_sig is None:
@@ -126,4 +129,3 @@ def convert_types(restype, argtypes):
         argtypes, restype = sigutils.normalize_signature(restype)
 
     return restype, argtypes
-
diff --git a/numba/cuda/simulator/api.py b/numba/cuda/simulator/api.py
index e3adfbdc224..2c2d82334b4 100644
--- a/numba/cuda/simulator/api.py
+++ b/numba/cuda/simulator/api.py
@@ -73,7 +73,13 @@ def elapsed_time(self, event):
 
 
 def jit(func_or_sig=None, device=False, debug=False, argtypes=None,
-        inline=False, restype=None, fastmath=False, link=None):
+        inline=False, restype=None, fastmath=False, link=None,
+        boundscheck=None,
+        ):
+    # Here for API compatibility
+    if boundscheck is not None:
+        raise NotImplementedError("bounds checking is not supported for CUDA")
+
     if link is not None:
         raise NotImplementedError('Cannot link PTX in the simulator')
     # Check for first argument specifying types - in that case the
diff --git a/numba/cuda/target.py b/numba/cuda/target.py
index f475551aff9..9c70ae9e1ca 100644
--- a/numba/cuda/target.py
+++ b/numba/cuda/target.py
@@ -60,6 +60,11 @@ class CUDATargetContext(BaseContext):
     strict_alignment = True
     DIBuilder = debuginfo.NvvmDIBuilder
 
+    @property
+    def enable_boundscheck(self):
+        # Unconditionally disabled
+        return False
+
     # Overrides
     def create_module(self, name):
         return self._internal_codegen._create_empty_module(name)
diff --git a/numba/decorators.py b/numba/decorators.py
index d04743ac850..8d05ff826dc 100644
--- a/numba/decorators.py
+++ b/numba/decorators.py
@@ -39,7 +39,7 @@ def autojit(*args, **kws):
                                  "positional argument.")
 
 def jit(signature_or_function=None, locals={}, target='cpu', cache=False,
-        pipeline_class=None, **options):
+        pipeline_class=None, boundscheck=False, **options):
     """
     This decorator is used to compile a Python function into native code.
 
@@ -99,6 +99,16 @@ def jit(signature_or_function=None, locals={}, target='cpu', cache=False,
                 NOTE: This inlining is performed at the Numba IR level and is in
                 no way related to LLVM inlining.
 
+            boundscheck: bool
+                Set to True to enable bounds checking for array indices. Out
+                of bounds accesses will raise IndexError. The default is to
+                not do bounds checking. If bounds checking is disabled, out of
+                bounds accesses can produce garbage results or segfaults.
+                However, enabling bounds checking will slow down typical
+                functions, so it is recommended to only use this flag for
+                debugging. You can also set the NUMBA_BOUNDSCHECK environment
+                variable to 0 or 1 to globally override this flag.
+
     Returns
     --------
     A callable usable as a compiled function.  Actual compiling will be
@@ -149,6 +159,8 @@ def bar(x, y):
     if 'restype' in options:
         raise DeprecationError(_msg_deprecated_signature_arg.format('restype'))
 
+    options['boundscheck'] = boundscheck
+
     # Handle signature
     if signature_or_function is None:
         # No signature, no function
diff --git a/numba/npyufunc/ufuncbuilder.py b/numba/npyufunc/ufuncbuilder.py
index 218d405c64f..2ad35d460d0 100644
--- a/numba/npyufunc/ufuncbuilder.py
+++ b/numba/npyufunc/ufuncbuilder.py
@@ -24,6 +24,7 @@ class UFuncTargetOptions(TargetOptions):
     OPTIONS = {
         "nopython" : bool,
         "forceobj" : bool,
+        "boundscheck": bool,
         "fastmath" : FastMathOptions,
     }
 
diff --git a/numba/roc/hsaimpl.py b/numba/roc/hsaimpl.py
index 961ac0522c0..0e97a7b6188 100644
--- a/numba/roc/hsaimpl.py
+++ b/numba/roc/hsaimpl.py
@@ -232,7 +232,7 @@ def hsail_atomic_add_tuple(context, builder, sig, args):
                         (aryty.ndim, len(indty)))
 
     lary = context.make_array(aryty)(context, builder, ary)
-    ptr = cgutils.get_item_pointer(builder, aryty, lary, indices)
+    ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)
 
     return builder.atomic_rmw("add", ptr, val, ordering='monotonic')
 
diff --git a/numba/targets/arraymath.py b/numba/targets/arraymath.py
index 391eb34db66..6f6b3207f6c 100644
--- a/numba/targets/arraymath.py
+++ b/numba/targets/arraymath.py
@@ -2977,7 +2977,7 @@ def array_nonzero(context, builder, sig, args):
     one = context.get_constant(types.intp, 1)
     count = cgutils.alloca_once_value(builder, zero)
     with cgutils.loop_nest(builder, shape, zero.type) as indices:
-        ptr = cgutils.get_item_pointer2(builder, data, shape, strides,
+        ptr = cgutils.get_item_pointer2(context, builder, data, shape, strides,
                                         layout, indices)
         val = load_item(context, builder, aryty, ptr)
         nz = context.is_true(builder, aryty.dtype, val)
@@ -2994,7 +2994,7 @@ def array_nonzero(context, builder, sig, args):
     # And fill them up
     index = cgutils.alloca_once_value(builder, zero)
     with cgutils.loop_nest(builder, shape, zero.type) as indices:
-        ptr = cgutils.get_item_pointer2(builder, data, shape, strides,
+        ptr = cgutils.get_item_pointer2(context, builder, data, shape, strides,
                                         layout, indices)
         val = load_item(context, builder, aryty, ptr)
         nz = context.is_true(builder, aryty.dtype, val)
@@ -3005,7 +3005,7 @@ def array_nonzero(context, builder, sig, args):
                 indices = (zero,)
             cur = builder.load(index)
             for i in range(nouts):
-                ptr = cgutils.get_item_pointer2(builder, out_datas[i],
+                ptr = cgutils.get_item_pointer2(context, builder, out_datas[i],
                                                 out_shape, (),
                                                 'C', [cur])
                 store_item(context, builder, outaryty, indices[i], ptr)
diff --git a/numba/targets/arrayobj.py b/numba/targets/arrayobj.py
index e8d1db21295..b43b78eb8be 100644
--- a/numba/targets/arrayobj.py
+++ b/numba/targets/arrayobj.py
@@ -279,7 +279,7 @@ def _getitem_array1d(context, builder, arrayty, array, idx, wraparound):
     """
     Look up and return an element from a 1D array.
     """
-    ptr = cgutils.get_item_pointer(builder, arrayty, array, [idx],
+    ptr = cgutils.get_item_pointer(context, builder, arrayty, array, [idx],
                                    wraparound=wraparound)
     return load_item(context, builder, arrayty, ptr)
 
@@ -315,7 +315,8 @@ def iternext_array(context, builder, sig, args, result):
 #-------------------------------------------------------------------------------
 # Basic indexing (with integers and slices only)
 
-def basic_indexing(context, builder, aryty, ary, index_types, indices):
+def basic_indexing(context, builder, aryty, ary, index_types, indices,
+                   boundscheck=None):
     """
     Perform basic indexing on the given array.
     A (data pointer, shapes, strides) tuple is returned describing
@@ -354,6 +355,8 @@ def basic_indexing(context, builder, aryty, ary, index_types, indices):
         elif isinstance(idxty, types.Integer):
             ind = fix_integer_index(context, builder, idxty, indexval,
                                     shapes[ax])
+            if boundscheck:
+                cgutils.do_boundscheck(context, builder, ind, shapes[ax], ax)
             output_indices.append(ind)
         else:
             raise NotImplementedError("unexpected index type: %s" % (idxty,))
@@ -368,9 +371,9 @@ def basic_indexing(context, builder, aryty, ary, index_types, indices):
 
     # No need to check wraparound, as negative indices were already
     # fixed in the loop above.
-    dataptr = cgutils.get_item_pointer(builder, aryty, ary,
+    dataptr = cgutils.get_item_pointer(context, builder, aryty, ary,
                                        output_indices,
-                                       wraparound=False)
+                                       wraparound=False, boundscheck=False)
     return (dataptr, output_shapes, output_strides)
 
 
@@ -397,7 +400,8 @@ def _getitem_array_generic(context, builder, return_type, aryty, ary,
     returning either a scalar or a view.
     """
     dataptr, view_shapes, view_strides = \
-        basic_indexing(context, builder, aryty, ary, index_types, indices)
+        basic_indexing(context, builder, aryty, ary, index_types, indices,
+                       boundscheck=context.enable_boundscheck)
 
     if isinstance(return_type, types.Buffer):
         # Build array view
@@ -475,7 +479,8 @@ def setitem_array(context, builder, sig, args):
                                              index_types, indices)
     try:
         dataptr, shapes, strides = \
-            basic_indexing(context, builder, aryty, ary, index_types, indices)
+            basic_indexing(context, builder, aryty, ary, index_types, indices,
+                           boundscheck=context.enable_boundscheck)
     except NotImplementedError:
         use_fancy_indexing = True
     else:
@@ -1025,8 +1030,9 @@ def fancy_getitem(context, builder, sig, args,
 
     # No need to check for wraparound, as the indexers all ensure
     # a positive index is returned.
-    ptr = cgutils.get_item_pointer2(builder, data, shapes, strides,
-                                    aryty.layout, indices, wraparound=False)
+    ptr = cgutils.get_item_pointer2(context, builder, data, shapes, strides,
+                                    aryty.layout, indices, wraparound=False,
+                                    boundscheck=context.enable_boundscheck)
     val = load_item(context, builder, aryty, ptr)
 
     # Since the destination is C-contiguous, no need for multi-dimensional
@@ -1162,10 +1168,10 @@ def maybe_copy_source(context, builder, use_copy,
         intp_t = context.get_value_type(types.intp)
 
         with cgutils.loop_nest(builder, src_shapes, intp_t) as indices:
-            src_ptr = cgutils.get_item_pointer2(builder, src_data,
+            src_ptr = cgutils.get_item_pointer2(context, builder, src_data,
                                                 src_shapes, src_strides,
                                                 srcty.layout, indices)
-            dest_ptr = cgutils.get_item_pointer2(builder, data,
+            dest_ptr = cgutils.get_item_pointer2(context, builder, data,
                                                  copy_shapes, copy_strides,
                                                  copy_layout, indices)
             builder.store(builder.load(src_ptr), dest_ptr)
@@ -1176,14 +1182,15 @@ def src_getitem(source_indices):
         with builder.if_else(use_copy, likely=False) as (if_copy, otherwise):
             with if_copy:
                 builder.store(
-                    cgutils.get_item_pointer2(builder, builder.load(copy_data),
+                    cgutils.get_item_pointer2(context, builder,
+                                              builder.load(copy_data),
                                               copy_shapes, copy_strides,
                                               copy_layout, source_indices,
                                               wraparound=False),
                     src_ptr)
             with otherwise:
                 builder.store(
-                    cgutils.get_item_pointer2(builder, src_data,
+                    cgutils.get_item_pointer2(context, builder, src_data,
                                               src_shapes, src_strides,
                                               srcty.layout, source_indices,
                                               wraparound=False),
@@ -1391,7 +1398,7 @@ def src_cleanup():
 
     # No need to check for wraparound, as the indexers all ensure
     # a positive index is returned.
-    dest_ptr = cgutils.get_item_pointer2(builder, dest_data,
+    dest_ptr = cgutils.get_item_pointer2(context, builder, dest_data,
                                          dest_shapes, dest_strides,
                                          aryty.layout, dest_indices,
                                          wraparound=False)
@@ -2614,7 +2621,7 @@ class IndexedSubIter(BaseSubIter):
 
         def compute_pointer(self, context, builder, indices, arrty, arr):
             assert len(indices) == self.ndim
-            return cgutils.get_item_pointer(builder, arrty, arr,
+            return cgutils.get_item_pointer(context, builder, arrty, arr,
                                             indices, wraparound=False)
 
     class ZeroDimSubIter(BaseSubIter):
@@ -3116,8 +3123,9 @@ def _ptr_for_index(self, context, builder, arrty, arr, index):
                     index = builder.udiv(index, shapes[dim])
                 indices.reverse()
 
-                ptr = cgutils.get_item_pointer2(builder, arr.data, shapes,
-                                                strides, arrty.layout, indices)
+                ptr = cgutils.get_item_pointer2(context, builder, arr.data,
+                                                shapes, strides, arrty.layout,
+                                                indices)
                 return ptr
 
             def getitem(self, context, builder, arrty, arr, index):
@@ -3840,6 +3848,8 @@ def numpy_linspace_3(context, builder, sig, args):
 
     def linspace(start, stop, num):
         arr = np.empty(num, dtype)
+        if num == 0:
+            return arr
         div = num - 1
         delta = stop - start
         arr[0] = start
@@ -3877,10 +3887,10 @@ def _array_copy(context, builder, sig, args):
         intp_t = context.get_value_type(types.intp)
 
         with cgutils.loop_nest(builder, shapes, intp_t) as indices:
-            src_ptr = cgutils.get_item_pointer2(builder, src_data,
+            src_ptr = cgutils.get_item_pointer2(context, builder, src_data,
                                                 shapes, src_strides,
                                                 arytype.layout, indices)
-            dest_ptr = cgutils.get_item_pointer2(builder, dest_data,
+            dest_ptr = cgutils.get_item_pointer2(context, builder, dest_data,
                                                  shapes, dest_strides,
                                                  rettype.layout, indices)
             builder.store(builder.load(src_ptr), dest_ptr)
@@ -3990,10 +4000,10 @@ def array_astype(context, builder, sig, args):
     intp_t = context.get_value_type(types.intp)
 
     with cgutils.loop_nest(builder, shapes, intp_t) as indices:
-        src_ptr = cgutils.get_item_pointer2(builder, src_data,
+        src_ptr = cgutils.get_item_pointer2(context, builder, src_data,
                                             shapes, src_strides,
                                             arytype.layout, indices)
-        dest_ptr = cgutils.get_item_pointer2(builder, dest_data,
+        dest_ptr = cgutils.get_item_pointer2(context, builder, dest_data,
                                              shapes, dest_strides,
                                              rettype.layout, indices)
         item = load_item(context, builder, arytype, src_ptr)
@@ -4205,7 +4215,7 @@ def assign_sequence_to_array(context, builder, data, shapes, strides,
     """
 
     def assign_item(indices, valty, val):
-        ptr = cgutils.get_item_pointer2(builder, data, shapes, strides,
+        ptr = cgutils.get_item_pointer2(context, builder, data, shapes, strides,
                                         arrty.layout, indices, wraparound=False)
         val = context.cast(builder, val, valty, arrty.dtype)
         store_item(context, builder, arrty, val, ptr)
@@ -4481,12 +4491,12 @@ def _do_concatenate(context, builder, axis,
                                       order=retty.layout)
 
         with loop_nest as indices:
-            src_ptr = cgutils.get_item_pointer2(builder, arr_data,
+            src_ptr = cgutils.get_item_pointer2(context, builder, arr_data,
                                                 arr_sh, arr_st,
                                                 arrty.layout, indices)
             val = load_item(context, builder, arrty, src_ptr)
             val = context.cast(builder, val, arrty.dtype, retty.dtype)
-            dest_ptr = cgutils.get_item_pointer2(builder, ret_data,
+            dest_ptr = cgutils.get_item_pointer2(context, builder, ret_data,
                                                  ret_shapes, ret_strides,
                                                  retty.layout, indices)
             store_item(context, builder, retty, val, dest_ptr)
diff --git a/numba/targets/base.py b/numba/targets/base.py
index 98122fd0589..a7b52d69e52 100644
--- a/numba/targets/base.py
+++ b/numba/targets/base.py
@@ -14,7 +14,7 @@
 from llvmlite.llvmpy.core import Type, Constant, LLVMException
 import llvmlite.binding as ll
 
-from numba import types, utils, cgutils, typing, funcdesc, debuginfo
+from numba import types, utils, cgutils, typing, funcdesc, debuginfo, config
 from numba import _dynfunc, _helperlib
 from numba.compiler_lock import global_compiler_lock
 from numba.pythonapi import PythonAPI
@@ -197,7 +197,15 @@ class BaseContext(object):
     DIBuilder = debuginfo.DIBuilder
 
     # Bound checking
-    enable_boundcheck = False
+    @property
+    def enable_boundscheck(self):
+        if config.BOUNDSCHECK is not None:
+            return config.BOUNDSCHECK
+        return self._boundscheck
+
+    @enable_boundscheck.setter
+    def enable_boundscheck(self, value):
+        self._boundscheck = value
 
     # NRT
     enable_nrt = False
@@ -244,6 +252,8 @@ def __init__(self, typing_context):
         self._pid = None
         self._codelib_stack = []
 
+        self._boundscheck = False
+
         self.data_model_manager = datamodel.default_manager
 
         # Initialize
diff --git a/numba/targets/cpu.py b/numba/targets/cpu.py
index 5435dbda6ef..b3cfafc9d03 100644
--- a/numba/targets/cpu.py
+++ b/numba/targets/cpu.py
@@ -208,7 +208,7 @@ class CPUTargetOptions(TargetOptions):
         "nogil": bool,
         "forceobj": bool,
         "looplift": bool,
-        "boundcheck": bool,
+        "boundscheck": bool,
         "debug": bool,
         "_nrt": bool,
         "no_rewrites": bool,
diff --git a/numba/targets/npyimpl.py b/numba/targets/npyimpl.py
index d31008cdc77..279ffa66fd8 100644
--- a/numba/targets/npyimpl.py
+++ b/numba/targets/npyimpl.py
@@ -136,7 +136,8 @@ def create_iter_indices(self):
         return _ArrayIndexingHelper(self, indices)
 
     def _load_effective_address(self, indices):
-        return cgutils.get_item_pointer2(self.builder,
+        return cgutils.get_item_pointer2(self.context,
+                                         self.builder,
                                          data=self.data,
                                          shape=self.shape,
                                          strides=self.strides,
diff --git a/numba/targets/options.py b/numba/targets/options.py
index 0acd7a16c54..7f3fe9efb27 100644
--- a/numba/targets/options.py
+++ b/numba/targets/options.py
@@ -44,15 +44,15 @@ def set_flags(self, flags):
         if kws.pop('looplift', True):
             flags.set("enable_looplift")
 
-        if kws.pop('boundcheck', False):
-            flags.set("boundcheck")
+        if kws.pop('boundscheck', False):
+            flags.set("boundscheck")
 
         if kws.pop('_nrt', True):
             flags.set("nrt")
 
         if kws.pop('debug', config.DEBUGINFO_DEFAULT):
             flags.set("debuginfo")
-            flags.set("boundcheck")
+            flags.set("boundscheck")
 
         if kws.pop('nogil', False):
             flags.set("release_gil")
@@ -80,4 +80,3 @@ def set_flags(self, flags):
         if kws:
             # Unread options?
             raise NameError("Unrecognized options: %s" % kws.keys())
-
diff --git a/numba/tests/test_boundscheck.py b/numba/tests/test_boundscheck.py
new file mode 100644
index 00000000000..8ee117a8ffa
--- /dev/null
+++ b/numba/tests/test_boundscheck.py
@@ -0,0 +1,251 @@
+from __future__ import print_function, division, absolute_import
+
+import numpy as np
+
+from numba.compiler import compile_isolated, DEFAULT_FLAGS
+from numba import typeof, config, cuda, njit
+from numba.types import float64
+from numba import unittest_support as unittest
+from .support import MemoryLeakMixin, override_env_config
+
+BOUNDSCHECK_FLAGS = DEFAULT_FLAGS.copy()
+BOUNDSCHECK_FLAGS.set('boundscheck', True)
+
+
+def basic_array_access(a):
+    return a[10]
+
+
+def slice_array_access(a):
+    # The first index (slice) is not bounds checked
+    return a[10:, 10]
+
+
+def fancy_array_access(x):
+    a = np.array([1, 2, 3])
+    return x[a]
+
+
+class TestBoundsCheckNoError(MemoryLeakMixin, unittest.TestCase):
+    def setUp(self):
+        self.old_boundscheck = config.BOUNDSCHECK
+        config.BOUNDSCHECK = None
+
+    def test_basic_array_boundscheck(self):
+        a = np.arange(5)
+        # Check the numpy behavior to make sure the test is correct
+        with self.assertRaises(IndexError):
+            # TODO: When we raise the same error message as numpy, test that
+            # they are the same
+            basic_array_access(a)
+
+        at = typeof(a)
+        c_noboundscheck = compile_isolated(basic_array_access, [at],
+                                           flags=DEFAULT_FLAGS)
+        noboundscheck = c_noboundscheck.entry_point
+        # Check that the default flag doesn't raise
+        noboundscheck(a)
+        # boundscheck(a) is tested in TestBoundsCheckError below
+
+    def test_slice_array_boundscheck(self):
+        a = np.ones((5, 5))
+        b = np.ones((5, 20))
+        with self.assertRaises(IndexError):
+            # TODO: When we raise the same error message as numpy, test that
+            # they are the same
+            slice_array_access(a)
+        # Out of bounds on a slice doesn't raise
+        slice_array_access(b)
+
+        at = typeof(a)
+        rt = float64[:]
+        c_noboundscheck = compile_isolated(slice_array_access, [at],
+                                           return_type=rt,
+                                           flags=DEFAULT_FLAGS)
+        noboundscheck = c_noboundscheck.entry_point
+        c_boundscheck = compile_isolated(slice_array_access, [at],
+                                         return_type=rt,
+                                         flags=BOUNDSCHECK_FLAGS)
+        boundscheck = c_boundscheck.entry_point
+        # Check that the default flag doesn't raise
+        noboundscheck(a)
+        noboundscheck(b)
+        # boundscheck(a) is tested in TestBoundsCheckError below
+
+        # Doesn't raise
+        boundscheck(b)
+
+    def test_fancy_indexing_boundscheck(self):
+        a = np.arange(3)
+        b = np.arange(4)
+
+        # Check the numpy behavior to ensure the test is correct.
+        with self.assertRaises(IndexError):
+            # TODO: When we raise the same error message as numpy, test that
+            # they are the same
+            fancy_array_access(a)
+        fancy_array_access(b)
+
+        at = typeof(a)
+        rt = at.dtype[:]
+        c_noboundscheck = compile_isolated(fancy_array_access, [at],
+                                           return_type=rt,
+                                           flags=DEFAULT_FLAGS)
+        noboundscheck = c_noboundscheck.entry_point
+        c_boundscheck = compile_isolated(fancy_array_access, [at],
+                                         return_type=rt,
+                                         flags=BOUNDSCHECK_FLAGS)
+        boundscheck = c_boundscheck.entry_point
+        # Check that the default flag doesn't raise
+        noboundscheck(a)
+        noboundscheck(b)
+        # boundscheck(a) is tested in TestBoundsCheckError below
+
+        # Doesn't raise
+        boundscheck(b)
+
+    @unittest.skipIf(not cuda.is_available(), "NO CUDA")
+    def test_no_cuda_boundscheck(self):
+        with self.assertRaises(NotImplementedError):
+            @cuda.jit(boundscheck=True)
+            def func():
+                pass
+
+        with override_env_config('NUMBA_BOUNDSCHECK', '1'):
+            @cuda.jit
+            def func2(x, a):
+                a[1] = x[1]
+
+            a = np.ones((1,))
+            x = np.zeros((1,))
+            # Out of bounds but doesn't raise (it does raise in the simulator,
+            # so skip there)
+            if not config.ENABLE_CUDASIM:
+                func2(x, a)
+
+    def tearDown(self):
+        config.BOUNDSCHECK = self.old_boundscheck
+
+
+# This is a separate test because the jitted functions that raise exceptions
+# have memory leaks.
+class TestBoundsCheckError(unittest.TestCase):
+    def setUp(self):
+        self.old_boundscheck = config.BOUNDSCHECK
+        config.BOUNDSCHECK = None
+
+    def test_basic_array_boundscheck(self):
+        a = np.arange(5)
+        # Check the numpy behavior to make sure the test is correct
+        with self.assertRaises(IndexError):
+            # TODO: When we raise the same error message as numpy, test that
+            # they are the same
+            basic_array_access(a)
+
+        at = typeof(a)
+        c_boundscheck = compile_isolated(basic_array_access, [at],
+                                         flags=BOUNDSCHECK_FLAGS)
+        boundscheck = c_boundscheck.entry_point
+
+        with self.assertRaises(IndexError):
+            boundscheck(a)
+
+    def test_slice_array_boundscheck(self):
+        a = np.ones((5, 5))
+        b = np.ones((5, 20))
+        with self.assertRaises(IndexError):
+            # TODO: When we raise the same error message as numpy, test that
+            # they are the same
+            slice_array_access(a)
+        # Out of bounds on a slice doesn't raise
+        slice_array_access(b)
+
+        at = typeof(a)
+        rt = float64[:]
+        c_boundscheck = compile_isolated(slice_array_access, [at],
+                                         return_type=rt,
+                                         flags=BOUNDSCHECK_FLAGS)
+        boundscheck = c_boundscheck.entry_point
+        with self.assertRaises(IndexError):
+            boundscheck(a)
+
+    def test_fancy_indexing_boundscheck(self):
+        a = np.arange(3)
+        b = np.arange(4)
+
+        # Check the numpy behavior to ensure the test is correct.
+        with self.assertRaises(IndexError):
+            # TODO: When we raise the same error message as numpy, test that
+            # they are the same
+            fancy_array_access(a)
+        fancy_array_access(b)
+
+        at = typeof(a)
+        rt = at.dtype[:]
+        c_boundscheck = compile_isolated(fancy_array_access, [at],
+                                         return_type=rt,
+                                         flags=BOUNDSCHECK_FLAGS)
+        boundscheck = c_boundscheck.entry_point
+        with self.assertRaises(IndexError):
+            boundscheck(a)
+
+    def tearDown(self):
+        config.BOUNDSCHECK = self.old_boundscheck
+
+
+class TestBoundsEnvironmentVariable(unittest.TestCase):
+    def setUp(self):
+        self.old_boundscheck = config.BOUNDSCHECK
+        config.BOUNDSCHECK = None
+
+        @njit
+        def default(x):
+            return x[1]
+
+        @njit(boundscheck=False)
+        def off(x):
+            return x[1]
+
+        @njit(boundscheck=True)
+        def on(x):
+            return x[1]
+
+        self.default = default
+        self.off = off
+        self.on = on
+
+    def test_boundscheck_unset(self):
+        with override_env_config('NUMBA_BOUNDSCHECK', ''):
+            a = np.array([1])
+
+            # Doesn't raise
+            self.default(a)
+            self.off(a)
+
+            with self.assertRaises(IndexError):
+                self.on(a)
+
+    def test_boundscheck_enabled(self):
+        with override_env_config('NUMBA_BOUNDSCHECK', '1'):
+            a = np.array([1])
+
+            with self.assertRaises(IndexError):
+                self.default(a)
+                self.off(a)
+                self.on(a)
+
+    def test_boundscheck_disabled(self):
+        with override_env_config('NUMBA_BOUNDSCHECK', '0'):
+            a = np.array([1])
+
+            # Doesn't raise
+            self.default(a)
+            self.off(a)
+            self.on(a)
+
+    def tearDown(self):
+        config.BOUNDSCHECK = self.old_boundscheck
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/numba/tests/test_jit_module.py b/numba/tests/test_jit_module.py
index 57f059adaa1..05e74d2ae9e 100644
--- a/numba/tests/test_jit_module.py
+++ b/numba/tests/test_jit_module.py
@@ -129,6 +129,7 @@ def test_jit_module_jit_options(self):
         jit_options = {"nopython": True,
                        "nogil": False,
                        "error_model": "numpy",
+                       "boundscheck": False,
                        }
         with self.create_temp_jitted_module(**jit_options) as test_module:
             self.assertEqual(test_module.inc.targetoptions, jit_options)
@@ -148,13 +149,15 @@ def add(x, y):
 """
         jit_options = {"nopython": True,
                        "error_model": "numpy",
+                       "boundscheck": False,
                        }
         with self.create_temp_jitted_module(source_lines=source_lines,
                                             **jit_options) as test_module:
             self.assertEqual(test_module.add.targetoptions, jit_options)
             # Test that manual jit-wrapping overrides jit_module options
             self.assertEqual(test_module.inc.targetoptions,
-                             {'nogil': True, 'forceobj': True})
+                             {'nogil': True, 'forceobj': True,
+                              'boundscheck': False})
 
     def test_jit_module_logging_output(self):
         logger = logging.getLogger('numba.decorators')
diff --git a/numba/tests/test_parfors.py b/numba/tests/test_parfors.py
index 41e2b76727a..f85acbec764 100644
--- a/numba/tests/test_parfors.py
+++ b/numba/tests/test_parfors.py
@@ -20,7 +20,7 @@
 
 import numba
 from numba import unittest_support as unittest
-from .support import TestCase, captured_stdout, MemoryLeakMixin
+from .support import TestCase, captured_stdout, MemoryLeakMixin, override_env_config
 from numba import njit, prange, stencil, inline_closurecall
 from numba import compiler, typing, errors, typed_passes
 from numba.targets import cpu
@@ -2400,11 +2400,13 @@ def will_vectorize(A):
 
         arg = np.zeros(10)
 
-        novec_asm = self.get_gufunc_asm(will_not_vectorize, 'signed', arg,
-                                        fastmath=True)
+        # Boundschecking breaks vectorization
+        with override_env_config('NUMBA_BOUNDSCHECK', '0'):
+            novec_asm = self.get_gufunc_asm(will_not_vectorize, 'signed', arg,
+                                            fastmath=True)
 
-        vec_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
-                                      fastmath=True)
+            vec_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
+                                          fastmath=True)
 
         for v in novec_asm.values():
             # vector variant should not be present
@@ -2447,10 +2449,12 @@ def unsigned_variant():
                 A += i
             return A
 
-        signed_asm = self.get_gufunc_asm(signed_variant, 'signed',
-                                         fastmath=True)
-        unsigned_asm = self.get_gufunc_asm(unsigned_variant, 'unsigned',
-                                           fastmath=True)
+        # Boundschecking breaks the diff check below because of the pickled exception
+        with override_env_config('NUMBA_BOUNDSCHECK', '0'):
+            signed_asm = self.get_gufunc_asm(signed_variant, 'signed',
+                                             fastmath=True)
+            unsigned_asm = self.get_gufunc_asm(unsigned_variant, 'unsigned',
+                                               fastmath=True)
 
         def strip_instrs(asm):
             acc = []