Skip to content

Commit

Permalink
Merge pull request #6570 from charris/backport-6562
Browse files Browse the repository at this point in the history
Backport 6562
  • Loading branch information
charris committed Oct 27, 2015
2 parents e0ca6c3 + f1eca4b commit 2093126
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 217 deletions.
137 changes: 13 additions & 124 deletions numpy/core/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,94 +302,6 @@ def _copy_fields(ary):
'formats': [dt.fields[name][0] for name in dt.names]}
return array(ary, dtype=copy_dtype, copy=True)

def _get_all_field_offsets(dtype, base_offset=0):
""" Returns the types and offsets of all fields in a (possibly structured)
data type, including nested fields and subarrays.
Parameters
----------
dtype : data-type
Data type to extract fields from.
base_offset : int, optional
Additional offset to add to all field offsets.
Returns
-------
fields : list of (data-type, int) pairs
A flat list of (dtype, byte offset) pairs.
"""
fields = []
if dtype.fields is not None:
for name in dtype.names:
sub_dtype = dtype.fields[name][0]
sub_offset = dtype.fields[name][1] + base_offset
fields.extend(_get_all_field_offsets(sub_dtype, sub_offset))
else:
if dtype.shape:
sub_offsets = _get_all_field_offsets(dtype.base, base_offset)
count = 1
for dim in dtype.shape:
count *= dim
fields.extend((typ, off + dtype.base.itemsize*j)
for j in range(count) for (typ, off) in sub_offsets)
else:
fields.append((dtype, base_offset))
return fields

def _check_field_overlap(new_fields, old_fields):
""" Perform object memory overlap tests for two data-types (see
_view_is_safe).
This function checks that new fields only access memory contained in old
fields, and that non-object fields are not interpreted as objects and vice
versa.
Parameters
----------
new_fields : list of (data-type, int) pairs
Flat list of (dtype, byte offset) pairs for the new data type, as
returned by _get_all_field_offsets.
old_fields: list of (data-type, int) pairs
Flat list of (dtype, byte offset) pairs for the old data type, as
returned by _get_all_field_offsets.
Raises
------
TypeError
If the new fields are incompatible with the old fields
"""

#first go byte by byte and check we do not access bytes not in old_fields
new_bytes = set()
for tp, off in new_fields:
new_bytes.update(set(range(off, off+tp.itemsize)))
old_bytes = set()
for tp, off in old_fields:
old_bytes.update(set(range(off, off+tp.itemsize)))
if new_bytes.difference(old_bytes):
raise TypeError("view would access data parent array doesn't own")

#next check that we do not interpret non-Objects as Objects, and vv
obj_offsets = [off for (tp, off) in old_fields if tp.type is object_]
obj_size = dtype(object_).itemsize

for fld_dtype, fld_offset in new_fields:
if fld_dtype.type is object_:
# check we do not create object views where
# there are no objects.
if fld_offset not in obj_offsets:
raise TypeError("cannot view non-Object data as Object type")
else:
# next check we do not create non-object views
# where there are already objects.
# see validate_object_field_overlap for a similar computation.
for obj_offset in obj_offsets:
if (fld_offset < obj_offset + obj_size and
obj_offset < fld_offset + fld_dtype.itemsize):
raise TypeError("cannot view Object as non-Object type")

def _getfield_is_safe(oldtype, newtype, offset):
""" Checks safety of getfield for object arrays.
Expand All @@ -411,24 +323,23 @@ def _getfield_is_safe(oldtype, newtype, offset):
If the field access is invalid
"""
new_fields = _get_all_field_offsets(newtype, offset)
old_fields = _get_all_field_offsets(oldtype)
# raises if there is a problem
_check_field_overlap(new_fields, old_fields)
if newtype.hasobject or oldtype.hasobject:
if offset == 0 and newtype == oldtype:
return
if oldtype.names:
for name in oldtype.names:
if (oldtype.fields[name][1] == offset and
oldtype.fields[name][0] == newtype):
return
raise TypeError("Cannot get/set field of an object array")
return

def _view_is_safe(oldtype, newtype):
""" Checks safety of a view involving object arrays, for example when
doing::
np.zeros(10, dtype=oldtype).view(newtype)
We need to check that
1) No memory that is not an object will be interpreted as a object,
2) No memory containing an object will be interpreted as an arbitrary type.
Both cases can cause segfaults, eg in the case the view is written to.
Strategy here is to also disallow views where newtype has any field in a
place oldtype doesn't.
Parameters
----------
oldtype : data-type
Expand All @@ -448,31 +359,9 @@ def _view_is_safe(oldtype, newtype):
if oldtype == newtype:
return

new_fields = _get_all_field_offsets(newtype)
new_size = newtype.itemsize

old_fields = _get_all_field_offsets(oldtype)
old_size = oldtype.itemsize

# if the itemsizes are not equal, we need to check that all the
# 'tiled positions' of the object match up. Here, we allow
# for arbirary itemsizes (even those possibly disallowed
# due to stride/data length issues).
if old_size == new_size:
new_num = old_num = 1
else:
gcd_new_old = _gcd(new_size, old_size)
new_num = old_size // gcd_new_old
old_num = new_size // gcd_new_old

# get position of fields within the tiling
new_fieldtile = [(tp, off + new_size*j)
for j in range(new_num) for (tp, off) in new_fields]
old_fieldtile = [(tp, off + old_size*j)
for j in range(old_num) for (tp, off) in old_fields]

# raises if there is a problem
_check_field_overlap(new_fieldtile, old_fieldtile)
if newtype.hasobject or oldtype.hasobject:
raise TypeError("Cannot change data-type for object array.")
return

# Given a string containing a PEP 3118 format specifier,
# construct a Numpy dtype
Expand Down
83 changes: 0 additions & 83 deletions numpy/core/tests/test_multiarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -5726,89 +5726,6 @@ def test_collections_hashable(self):
x = np.array([])
self.assertFalse(isinstance(x, collections.Hashable))

from numpy.core._internal import _view_is_safe

class TestObjViewSafetyFuncs(TestCase):
def test_view_safety(self):
psize = np.dtype('p').itemsize

# creates dtype but with extra character code - for missing 'p' fields
def mtype(s):
n, offset, fields = 0, 0, []
for c in s.split(','): # subarrays won't work
if c != '-':
fields.append(('f{0}'.format(n), c, offset))
n += 1
offset += np.dtype(c).itemsize if c != '-' else psize

names, formats, offsets = zip(*fields)
return np.dtype({'names': names, 'formats': formats,
'offsets': offsets, 'itemsize': offset})

# test nonequal itemsizes with objects:
# these should succeed:
_view_is_safe(np.dtype('O,p,O,p'), np.dtype('O,p,O,p,O,p'))
_view_is_safe(np.dtype('O,O'), np.dtype('O,O,O'))
# these should fail:
assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('O,O'))
assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('O,p'))
assert_raises(TypeError, _view_is_safe, np.dtype('O,O,p'), np.dtype('p,O'))

# test nonequal itemsizes with missing fields:
# these should succeed:
_view_is_safe(mtype('-,p,-,p'), mtype('-,p,-,p,-,p'))
_view_is_safe(np.dtype('p,p'), np.dtype('p,p,p'))
# these should fail:
assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('p,p'))
assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('p,-'))
assert_raises(TypeError, _view_is_safe, mtype('p,p,-'), mtype('-,p'))

# scans through positions at which we can view a type
def scanView(d1, otype):
goodpos = []
for shift in range(d1.itemsize - np.dtype(otype).itemsize+1):
d2 = np.dtype({'names': ['f0'], 'formats': [otype],
'offsets': [shift], 'itemsize': d1.itemsize})
try:
_view_is_safe(d1, d2)
except TypeError:
pass
else:
goodpos.append(shift)
return goodpos

# test partial overlap with object field
assert_equal(scanView(np.dtype('p,O,p,p,O,O'), 'p'),
[0] + list(range(2*psize, 3*psize+1)))
assert_equal(scanView(np.dtype('p,O,p,p,O,O'), 'O'),
[psize, 4*psize, 5*psize])

# test partial overlap with missing field
assert_equal(scanView(mtype('p,-,p,p,-,-'), 'p'),
[0] + list(range(2*psize, 3*psize+1)))

# test nested structures with objects:
nestedO = np.dtype([('f0', 'p'), ('f1', 'p,O,p')])
assert_equal(scanView(nestedO, 'p'), list(range(psize+1)) + [3*psize])
assert_equal(scanView(nestedO, 'O'), [2*psize])

# test nested structures with missing fields:
nestedM = np.dtype([('f0', 'p'), ('f1', mtype('p,-,p'))])
assert_equal(scanView(nestedM, 'p'), list(range(psize+1)) + [3*psize])

# test subarrays with objects
subarrayO = np.dtype('p,(2,3)O,p')
assert_equal(scanView(subarrayO, 'p'), [0, 7*psize])
assert_equal(scanView(subarrayO, 'O'),
list(range(psize, 6*psize+1, psize)))

#test dtype with overlapping fields
overlapped = np.dtype({'names': ['f0', 'f1', 'f2', 'f3'],
'formats': ['p', 'p', 'p', 'p'],
'offsets': [0, 1, 3*psize-1, 3*psize],
'itemsize': 4*psize})
assert_equal(scanView(overlapped, 'p'), [0, 1, 3*psize-1, 3*psize])


class TestArrayPriority(TestCase):
# This will go away when __array_priority__ is settled, meanwhile
Expand Down
10 changes: 0 additions & 10 deletions numpy/lib/tests/test_recfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,15 +720,5 @@ def test_append_to_objects(self):
dtype=[('A', object), ('B', float), ('C', int)])
assert_equal(test, control)

def test_append_with_objects(self):
"Test append_fields when the appended data contains objects"
obj = self.data['obj']
x = np.array([(10, 1.), (20, 2.)], dtype=[('A', int), ('B', float)])
y = np.array([obj, obj], dtype=object)
test = append_fields(x, 'C', data=y, dtypes=object, usemask=False)
control = np.array([(10, 1.0, obj), (20, 2.0, obj)],
dtype=[('A', int), ('B', float), ('C', object)])
assert_equal(test, control)

if __name__ == '__main__':
run_module_suite()

0 comments on commit 2093126

Please sign in to comment.