Skip to content

Commit

Permalink
bpo-33312: Fix clang ubsan out of bounds warnings in dict. (pythonGH-…
Browse files Browse the repository at this point in the history
…6537)

Fix clang ubsan (undefined behavior sanitizer) warnings in dictobject.c by
adjusting how the internal struct _dictkeysobject shared keys structure is
declared.

This remains ABI compatible.  We get rid of the union at the end of the
struct being used for conveinence to avoid typecasting in favor of char[]
variable length array at the end of a struct. This is known to clang to be
used for variable sized objects and will not cause an undefined behavior
problem.  Similarly, char arrays do not have strict aliasing undefined
behavior when cast.

PEP-007 does not currently list variable length arrays (VLAs) as allowed
in our subset of C99.  If this turns out to be a problem, the fix to this is
to change the char `dk_indices[]` into `dk_indices[1]` and restore the
three size computation subtractions this change removes:
  `- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)`

If this works as is I'll make a separate PR to update PEP-007.
  • Loading branch information
gpshead committed Apr 20, 2018
1 parent b87c1c9 commit 397f1b2
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 24 deletions.
@@ -0,0 +1,3 @@
Fixed clang ubsan (undefined behavior sanitizer) warnings in dictobject.c by
adjusting how the internal struct _dictkeysobject shared keys structure is
declared.
11 changes: 2 additions & 9 deletions Objects/dict-common.h
Expand Up @@ -58,15 +58,8 @@ struct _dictkeysobject {
- 4 bytes if dk_size <= 0xffffffff (int32_t*)
- 8 bytes otherwise (int64_t*)
Dynamically sized, 8 is minimum. */
union {
int8_t as_1[8];
int16_t as_2[4];
int32_t as_4[2];
#if SIZEOF_VOID_P > 4
int64_t as_8[1];
#endif
} dk_indices;
Dynamically sized, SIZEOF_VOID_P is minimum. */
char dk_indices[]; /* char is required to avoid strict aliasing. */

/* "PyDictKeyEntry dk_entries[dk_usable];" array follows:
see the DK_ENTRIES() macro */
Expand Down
27 changes: 12 additions & 15 deletions Objects/dictobject.c
Expand Up @@ -298,7 +298,7 @@ PyDict_Fini(void)
2 : sizeof(int32_t))
#endif
#define DK_ENTRIES(dk) \
((PyDictKeyEntry*)(&(dk)->dk_indices.as_1[DK_SIZE(dk) * DK_IXSIZE(dk)]))
((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))

#define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA
#define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA
Expand All @@ -316,21 +316,21 @@ dk_get_index(PyDictKeysObject *keys, Py_ssize_t i)
Py_ssize_t ix;

if (s <= 0xff) {
int8_t *indices = keys->dk_indices.as_1;
int8_t *indices = (int8_t*)(keys->dk_indices);
ix = indices[i];
}
else if (s <= 0xffff) {
int16_t *indices = keys->dk_indices.as_2;
int16_t *indices = (int16_t*)(keys->dk_indices);
ix = indices[i];
}
#if SIZEOF_VOID_P > 4
else if (s > 0xffffffff) {
int64_t *indices = keys->dk_indices.as_8;
int64_t *indices = (int64_t*)(keys->dk_indices);
ix = indices[i];
}
#endif
else {
int32_t *indices = keys->dk_indices.as_4;
int32_t *indices = (int32_t*)(keys->dk_indices);
ix = indices[i];
}
assert(ix >= DKIX_DUMMY);
Expand All @@ -346,23 +346,23 @@ dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
assert(ix >= DKIX_DUMMY);

if (s <= 0xff) {
int8_t *indices = keys->dk_indices.as_1;
int8_t *indices = (int8_t*)(keys->dk_indices);
assert(ix <= 0x7f);
indices[i] = (char)ix;
}
else if (s <= 0xffff) {
int16_t *indices = keys->dk_indices.as_2;
int16_t *indices = (int16_t*)(keys->dk_indices);
assert(ix <= 0x7fff);
indices[i] = (int16_t)ix;
}
#if SIZEOF_VOID_P > 4
else if (s > 0xffffffff) {
int64_t *indices = keys->dk_indices.as_8;
int64_t *indices = (int64_t*)(keys->dk_indices);
indices[i] = ix;
}
#endif
else {
int32_t *indices = keys->dk_indices.as_4;
int32_t *indices = (int32_t*)(keys->dk_indices);
assert(ix <= 0x7fffffff);
indices[i] = (int32_t)ix;
}
Expand Down Expand Up @@ -421,8 +421,8 @@ static PyDictKeysObject empty_keys_struct = {
lookdict_split, /* dk_lookup */
0, /* dk_usable (immutable) */
0, /* dk_nentries */
.dk_indices = { .as_1 = {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}},
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
};

static PyObject *empty_values[1] = { NULL };
Expand Down Expand Up @@ -530,7 +530,6 @@ static PyDictKeysObject *new_keys_object(Py_ssize_t size)
}
else {
dk = PyObject_MALLOC(sizeof(PyDictKeysObject)
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ es * size
+ sizeof(PyDictKeyEntry) * usable);
if (dk == NULL) {
Expand All @@ -543,7 +542,7 @@ static PyDictKeysObject *new_keys_object(Py_ssize_t size)
dk->dk_usable = usable;
dk->dk_lookup = lookdict_unicode_nodummy;
dk->dk_nentries = 0;
memset(&dk->dk_indices.as_1[0], 0xff, es * size);
memset(&dk->dk_indices[0], 0xff, es * size);
memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
return dk;
}
Expand Down Expand Up @@ -3007,7 +3006,6 @@ _PyDict_SizeOf(PyDictObject *mp)
in the type object. */
if (mp->ma_keys->dk_refcnt == 1)
res += (sizeof(PyDictKeysObject)
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ DK_IXSIZE(mp->ma_keys) * size
+ sizeof(PyDictKeyEntry) * usable);
return res;
Expand All @@ -3017,7 +3015,6 @@ Py_ssize_t
_PyDict_KeysSize(PyDictKeysObject *keys)
{
return (sizeof(PyDictKeysObject)
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ DK_IXSIZE(keys) * DK_SIZE(keys)
+ USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry));
}
Expand Down

0 comments on commit 397f1b2

Please sign in to comment.