Skip to content

Commit

Permalink
gh-89653: PEP 670: Convert unicodeobject.h macros to functions (#91705)
Browse files Browse the repository at this point in the history
Convert unicodeobject.h macros to static inline functions:

* PyUnicode_CHECK_INTERNED()
* PyUnicode_DATA(), _PyUnicode_COMPACT_DATA(),
  _PyUnicode_NONCOMPACT_DATA()
* PyUnicode_GET_LENGTH()
* PyUnicode_IS_ASCII()
* PyUnicode_IS_COMPACT()
* PyUnicode_IS_COMPACT_ASCII()
* PyUnicode_IS_READY()

Reorder functions to declare functions before their first usage.

Static inline functions are wrapped by macros which casts
arguments with _PyObject_CAST() to prevent introducing
new compiler warnings when passing "const PyObject*".
  • Loading branch information
vstinner committed Apr 21, 2022
1 parent 25e3574 commit 4e52c66
Showing 1 changed file with 61 additions and 41 deletions.
102 changes: 61 additions & 41 deletions Include/cpython/unicodeobject.h
Expand Up @@ -287,37 +287,80 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
#define SSTATE_INTERNED_IMMORTAL 2

/* Use only if you know it's a string */
#define PyUnicode_CHECK_INTERNED(op) \
(_PyASCIIObject_CAST(op)->state.interned)
static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
return _PyASCIIObject_CAST(op)->state.interned;
}
#define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))

/* Fast check to determine whether an object is ready. Equivalent to:
PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */
static inline unsigned int PyUnicode_IS_READY(PyObject *op) {
return _PyASCIIObject_CAST(op)->state.ready;
}
#define PyUnicode_IS_READY(op) PyUnicode_IS_READY(_PyObject_CAST(op))

/* Return true if the string contains only ASCII characters, or 0 if not. The
string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
ready. */
#define PyUnicode_IS_ASCII(op) \
(assert(PyUnicode_IS_READY(op)), \
_PyASCIIObject_CAST(op)->state.ascii)
static inline unsigned int PyUnicode_IS_ASCII(PyObject *op) {
assert(PyUnicode_IS_READY(op));
return _PyASCIIObject_CAST(op)->state.ascii;
}
#define PyUnicode_IS_ASCII(op) PyUnicode_IS_ASCII(_PyObject_CAST(op))

/* Return true if the string is compact or 0 if not.
No type checks or Ready calls are performed. */
#define PyUnicode_IS_COMPACT(op) \
(_PyASCIIObject_CAST(op)->state.compact)
static inline unsigned int PyUnicode_IS_COMPACT(PyObject *op) {
return _PyASCIIObject_CAST(op)->state.compact;
}
#define PyUnicode_IS_COMPACT(op) PyUnicode_IS_COMPACT(_PyObject_CAST(op))

/* Return true if the string is a compact ASCII string (use PyASCIIObject
structure), or 0 if not. No type checks or Ready calls are performed. */
#define PyUnicode_IS_COMPACT_ASCII(op) \
(_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op))
static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) {
return (_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op));
}
#define PyUnicode_IS_COMPACT_ASCII(op) PyUnicode_IS_COMPACT_ASCII(_PyObject_CAST(op))

enum PyUnicode_Kind {
/* String contains only wstr byte characters. This is only possible
when the string was created with a legacy API and _PyUnicode_Ready()
has not been called yet. */
PyUnicode_WCHAR_KIND = 0,
/* Return values of the PyUnicode_KIND() macro: */
/* Return values of the PyUnicode_KIND() function: */
PyUnicode_1BYTE_KIND = 1,
PyUnicode_2BYTE_KIND = 2,
PyUnicode_4BYTE_KIND = 4
};

/* Return one of the PyUnicode_*_KIND values defined above. */
#define PyUnicode_KIND(op) \
(assert(PyUnicode_Check(op)), \
assert(PyUnicode_IS_READY(op)), \
((PyASCIIObject *)(op))->state.kind)

/* Return a void pointer to the raw unicode buffer. */
static inline void* _PyUnicode_COMPACT_DATA(PyObject *op) {
if (PyUnicode_IS_ASCII(op)) {
return (void*)(_PyASCIIObject_CAST(op) + 1);
}
return (void*)(_PyCompactUnicodeObject_CAST(op) + 1);
}

static inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) {
void *data = _PyUnicodeObject_CAST(op)->data.any;
assert(data != NULL);
return data;
}

static inline void* PyUnicode_DATA(PyObject *op) {
if (PyUnicode_IS_COMPACT(op)) {
return _PyUnicode_COMPACT_DATA(op);
}
return _PyUnicode_NONCOMPACT_DATA(op);
}
#define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op))

/* Return pointers to the canonical representation cast to unsigned char,
Py_UCS2, or Py_UCS4 for direct character access.
No checks are performed, use PyUnicode_KIND() before to ensure
Expand All @@ -327,24 +370,14 @@ enum PyUnicode_Kind {
#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))

/* Return one of the PyUnicode_*_KIND values defined above. */
#define PyUnicode_KIND(op) \
(assert(PyUnicode_IS_READY(op)), \
_PyASCIIObject_CAST(op)->state.kind)

/* Return a void pointer to the raw unicode buffer. */
#define _PyUnicode_COMPACT_DATA(op) \
(PyUnicode_IS_ASCII(op) ? \
((void*)(_PyASCIIObject_CAST(op) + 1)) : \
((void*)(_PyCompactUnicodeObject_CAST(op) + 1)))

#define _PyUnicode_NONCOMPACT_DATA(op) \
(assert(_PyUnicodeObject_CAST(op)->data.any), \
(_PyUnicodeObject_CAST(op)->data.any))

#define PyUnicode_DATA(op) \
(PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
_PyUnicode_NONCOMPACT_DATA(op))
/* Returns the length of the unicode string. The caller has to make sure that
the string has it's canonical representation set before calling
this function. Call PyUnicode_(FAST_)Ready to ensure that. */
static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) {
assert(PyUnicode_IS_READY(op));
return _PyASCIIObject_CAST(op)->length;
}
#define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op))

/* In the access macros below, "kind" may be evaluated more than once.
All other macro parameters are evaluated exactly once, so it is safe
Expand Down Expand Up @@ -400,19 +433,6 @@ enum PyUnicode_Kind {
) \
))

/* Returns the length of the unicode string. The caller has to make sure that
the string has it's canonical representation set before calling
this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
#define PyUnicode_GET_LENGTH(op) \
(assert(PyUnicode_IS_READY(op)), \
_PyASCIIObject_CAST(op)->length)


/* Fast check to determine whether an object is ready. Equivalent to
PyUnicode_IS_COMPACT(op) || _PyUnicodeObject_CAST(op)->data.any */

#define PyUnicode_IS_READY(op) (_PyASCIIObject_CAST(op)->state.ready)

/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
case. If the canonical representation is not yet set, it will still call
_PyUnicode_Ready().
Expand Down

0 comments on commit 4e52c66

Please sign in to comment.