Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add rest of unary ufuncs for unicode/bytes dtypes #25791

Merged
merged 11 commits into from
Feb 17, 2024
Merged
15 changes: 15 additions & 0 deletions numpy/_core/code_generators/generate_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,21 @@ def english_upper(s):
docstrings.get('numpy._core.umath.isspace'),
None,
),
'islower':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.islower'),
None,
),
'isupper':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.isupper'),
None,
),
'istitle':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.istitle'),
None,
),
'isdecimal':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.isdecimal'),
Expand Down
91 changes: 91 additions & 0 deletions numpy/_core/code_generators/ufunc_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4441,6 +4441,97 @@ def add_newdoc(place, name, doc):
""")

add_newdoc('numpy._core.umath', 'islower',
"""
Returns true for each element if all cased characters in the
string are lowercase and there is at least one cased character,
false otherwise.
Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS
Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1
See Also
--------
str.islower
Examples
--------
>>> np.strings.islower("GHC")
array(False)
>>> np.strings.islower("ghc")
array(True)
""")

add_newdoc('numpy._core.umath', 'isupper',
"""
Return true for each element if all cased characters in the
string are uppercase and there is at least one character, false
otherwise.
Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS
Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1
See Also
--------
str.isupper
Examples
--------
>>> np.strings.isupper("GHC")
array(True)
>>> a = np.array(["hello", "HELLO", "Hello"])
>>> np.strings.isupper(a)
array([False, True, False])
""")

add_newdoc('numpy._core.umath', 'istitle',
"""
Returns true for each element if the element is a titlecased
string and there is at least one character, false otherwise.
Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS
Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1
See Also
--------
str.istitle
Examples
--------
>>> np.strings.istitle("Numpy Is Great")
array(True)
>>> np.strings.istitle("Numpy is great")
array(False)
""")

add_newdoc('numpy._core.umath', 'isdecimal',
"""
For each element, return True if there are only decimal
Expand Down
162 changes: 162 additions & 0 deletions numpy/_core/src/umath/string_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ enum class IMPLEMENTED_UNARY_FUNCTIONS {
ISDECIMAL,
ISDIGIT,
ISSPACE,
ISLOWER,
ISUPPER,
ISTITLE,
ISNUMERIC,
STR_LEN,
};
Expand Down Expand Up @@ -136,6 +139,81 @@ codepoint_isspace<ENCODING::UTF8>(npy_ucs4 code)
return Py_UNICODE_ISSPACE(code);
}

template<ENCODING enc>
inline bool
codepoint_islower(npy_ucs4 code);

template<>
inline bool
codepoint_islower<ENCODING::ASCII>(npy_ucs4 code)
{
return Py_ISLOWER((char) code);
}

template<>
inline bool
codepoint_islower<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISLOWER(code);
}

template<>
inline bool
codepoint_islower<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISLOWER(code);
}

template<ENCODING enc>
inline bool
codepoint_isupper(npy_ucs4 code);

template<>
inline bool
codepoint_isupper<ENCODING::ASCII>(npy_ucs4 code)
{
return Py_ISUPPER((char) code);
}

template<>
inline bool
codepoint_isupper<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISUPPER(code);
}

template<>
inline bool
codepoint_isupper<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISUPPER(code);
}

template<ENCODING enc>
inline bool
codepoint_istitle(npy_ucs4);

template<>
inline bool
codepoint_istitle<ENCODING::ASCII>(npy_ucs4 code)
{
return false;
}

template<>
inline bool
codepoint_istitle<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISTITLE(code);
}

template<>
inline bool
codepoint_istitle<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISTITLE(code);
}

inline bool
codepoint_isnumeric(npy_ucs4 code)
{
Expand Down Expand Up @@ -389,6 +467,84 @@ struct Buffer {
return unary_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT>();
}

inline bool
islower()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = 0;
for (size_t i = 0; i < len; i++) {
if (codepoint_isupper<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
return false;
}
else if (!cased && codepoint_islower<enc>(*tmp)) {
cased = true;
}
tmp++;
}
return cased;
}

inline bool
isupper()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = 0;
for (size_t i = 0; i < len; i++) {
if (codepoint_islower<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
return false;
}
else if (!cased && codepoint_isupper<enc>(*tmp)) {
cased = true;
}
tmp++;
}
return cased;
}

inline bool
istitle()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = false;
bool previous_is_cased = false;
for (size_t i = 0; i < len; i++) {
if (codepoint_isupper<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
if (previous_is_cased) {
return false;
}
previous_is_cased = true;
cased = true;
}
else if (codepoint_islower<enc>(*tmp)) {
if (!previous_is_cased) {
return false;
}
previous_is_cased = true;
lysnikolaou marked this conversation as resolved.
Show resolved Hide resolved
cased = true;
}
else {
previous_is_cased = false;
}
tmp++;
}
return cased;
}

inline bool
isnumeric()
{
Expand Down Expand Up @@ -466,6 +622,12 @@ struct call_buffer_member_function {
return codepoint_isspace<enc>(*buf);
case IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN:
lysnikolaou marked this conversation as resolved.
Show resolved Hide resolved
return (T)buf.num_codepoints();
case IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER:
return (T)buf.islower();
case IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER:
return (T)buf.isupper();
case IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE:
return (T)buf.istitle();
case IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC:
return codepoint_isnumeric(*buf);
case IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL:
Expand Down
Loading
Loading