Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add rest of unary ufuncs for unicode/bytes dtypes #25791

Merged
merged 11 commits into from
Feb 17, 2024
Merged
20 changes: 20 additions & 0 deletions numpy/_core/code_generators/generate_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,26 @@ def english_upper(s):
docstrings.get('numpy._core.umath.isspace'),
None,
),
'isalnum':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.isalnum'),
None,
),
'islower':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.islower'),
None,
),
'isupper':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.isupper'),
None,
),
'istitle':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.istitle'),
None,
),
'isdecimal':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.isdecimal'),
Expand Down
119 changes: 119 additions & 0 deletions numpy/_core/code_generators/ufunc_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4441,6 +4441,125 @@ def add_newdoc(place, name, doc):

""")

add_newdoc('numpy._core.umath', 'isalnum',
"""
Returns true for each element if all characters in the string are
alphanumeric and there is at least one character, false otherwise.

Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS

Returns
-------
out : ndarray
Output array of bool
$OUT_SCALAR_1

See Also
--------
str.isalnum

Examples
--------
>>> a = np.array(['a', '1', 'a1', '(', ''])
>>> np.strings.isalnum(a)
array([ True, True, True, False, False])

""")

add_newdoc('numpy._core.umath', 'islower',
"""
Returns true for each element if all cased characters in the
string are lowercase and there is at least one cased character,
false otherwise.

Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS

Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1

See Also
--------
str.islower

Examples
--------
>>> np.strings.islower("GHC")
array(False)
>>> np.strings.islower("ghc")
array(True)

""")

add_newdoc('numpy._core.umath', 'isupper',
"""
Return true for each element if all cased characters in the
string are uppercase and there is at least one character, false
otherwise.

Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS

Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1

See Also
--------
str.isupper

Examples
--------
>>> np.strings.isupper("GHC")
array(True)
>>> a = np.array(["hello", "HELLO", "Hello"])
>>> np.strings.isupper(a)
array([False, True, False])

""")

add_newdoc('numpy._core.umath', 'istitle',
"""
Returns true for each element if the element is a titlecased
string and there is at least one character, false otherwise.

Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS

Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1

See Also
--------
str.istitle

Examples
--------
>>> np.strings.istitle("Numpy Is Great")
array(True)

>>> np.strings.istitle("Numpy is great")
array(False)

""")

add_newdoc('numpy._core.umath', 'isdecimal',
"""
For each element, return True if there are only decimal
Expand Down
2 changes: 1 addition & 1 deletion numpy/_core/src/common/numpyos.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ NumPyOS_ascii_isdigit(char c)
*
* Same as isalnum under C locale
*/
static int
NPY_NO_EXPORT int
NumPyOS_ascii_isalnum(char c)
{
return NumPyOS_ascii_isdigit(c) || NumPyOS_ascii_isalpha(c);
Expand Down
3 changes: 3 additions & 0 deletions numpy/_core/src/common/numpyos.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ NumPyOS_ascii_isalpha(char c);
NPY_NO_EXPORT int
NumPyOS_ascii_isdigit(char c);

NPY_NO_EXPORT int
NumPyOS_ascii_isalnum(char c);

/* Convert a string to an int in an arbitrary base */
NPY_NO_EXPORT npy_longlong
NumPyOS_strtoll(const char *str, char **endptr, int base);
Expand Down
192 changes: 190 additions & 2 deletions numpy/_core/src/umath/string_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ enum class IMPLEMENTED_UNARY_FUNCTIONS {
ISDECIMAL,
ISDIGIT,
ISSPACE,
ISALNUM,
ISLOWER,
ISUPPER,
ISTITLE,
ISNUMERIC,
STR_LEN,
};
Expand Down Expand Up @@ -136,6 +140,106 @@ codepoint_isspace<ENCODING::UTF8>(npy_ucs4 code)
return Py_UNICODE_ISSPACE(code);
}

template<ENCODING enc>
inline bool
codepoint_isalnum(npy_ucs4 code);

template<>
inline bool
codepoint_isalnum<ENCODING::ASCII>(npy_ucs4 code)
{
return NumPyOS_ascii_isalnum(code);
}

template<>
inline bool
codepoint_isalnum<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISALNUM(code);
}

template<>
inline bool
codepoint_isalnum<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISALNUM(code);
}

template<ENCODING enc>
inline bool
codepoint_islower(npy_ucs4 code);

template<>
inline bool
codepoint_islower<ENCODING::ASCII>(npy_ucs4 code)
{
return Py_ISLOWER((char) code);
}

template<>
inline bool
codepoint_islower<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISLOWER(code);
}

template<>
inline bool
codepoint_islower<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISLOWER(code);
}

template<ENCODING enc>
inline bool
codepoint_isupper(npy_ucs4 code);

template<>
inline bool
codepoint_isupper<ENCODING::ASCII>(npy_ucs4 code)
{
return Py_ISUPPER((char) code);
}

template<>
inline bool
codepoint_isupper<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISUPPER(code);
}

template<>
inline bool
codepoint_isupper<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISUPPER(code);
}

template<ENCODING enc>
inline bool
codepoint_istitle(npy_ucs4);

template<>
inline bool
codepoint_istitle<ENCODING::ASCII>(npy_ucs4 code)
{
return false;
}

template<>
inline bool
codepoint_istitle<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISTITLE(code);
}

template<>
inline bool
codepoint_istitle<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISTITLE(code);
}

inline bool
codepoint_isnumeric(npy_ucs4 code)
{
Expand Down Expand Up @@ -389,6 +493,90 @@ struct Buffer {
return unary_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT>();
}

inline bool
isalnum()
{
return unary_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM>();
}

inline bool
islower()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = 0;
for (size_t i = 0; i < len; i++) {
if (codepoint_isupper<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
return false;
}
else if (!cased && codepoint_islower<enc>(*tmp)) {
cased = true;
}
tmp++;
}
return cased;
}

inline bool
isupper()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = 0;
for (size_t i = 0; i < len; i++) {
if (codepoint_islower<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
return false;
}
else if (!cased && codepoint_isupper<enc>(*tmp)) {
cased = true;
}
tmp++;
}
return cased;
}

inline bool
istitle()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = false;
bool previous_is_cased = false;
for (size_t i = 0; i < len; i++) {
if (codepoint_isupper<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
if (previous_is_cased) {
return false;
}
previous_is_cased = true;
cased = true;
}
else if (codepoint_islower<enc>(*tmp)) {
if (!previous_is_cased) {
return false;
}
previous_is_cased = true;
lysnikolaou marked this conversation as resolved.
Show resolved Hide resolved
cased = true;
}
else {
previous_is_cased = false;
}
tmp++;
}
return cased;
}

inline bool
isnumeric()
{
Expand Down Expand Up @@ -464,8 +652,8 @@ struct call_buffer_member_function {
return codepoint_isdigit<enc>(*buf);
case IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE:
return codepoint_isspace<enc>(*buf);
case IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN:
lysnikolaou marked this conversation as resolved.
Show resolved Hide resolved
return (T)buf.num_codepoints();
case IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM:
return codepoint_isalnum<enc>(*buf);
case IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC:
return codepoint_isnumeric(*buf);
case IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL:
Expand Down