Skip to content

Commit

Permalink
ENH: Add islower/isupper/istitle ufuncs for unicode/bytes dtypes
Browse files Browse the repository at this point in the history
  • Loading branch information
lysnikolaou committed Feb 8, 2024
1 parent 295db0a commit fdb3062
Show file tree
Hide file tree
Showing 6 changed files with 497 additions and 107 deletions.
15 changes: 15 additions & 0 deletions numpy/_core/code_generators/generate_umath.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,21 @@ def english_upper(s):
docstrings.get('numpy._core.umath.isspace'),
None,
),
'islower':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.islower'),
None,
),
'isupper':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.isupper'),
None,
),
'istitle':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.istitle'),
None,
),
'isdecimal':
Ufunc(1, 1, False_,
docstrings.get('numpy._core.umath.isdecimal'),
Expand Down
91 changes: 91 additions & 0 deletions numpy/_core/code_generators/ufunc_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4441,6 +4441,97 @@ def add_newdoc(place, name, doc):
""")

add_newdoc('numpy._core.umath', 'islower',
"""
Returns true for each element if all cased characters in the
string are lowercase and there is at least one cased character,
false otherwise.
Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS
Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1
See Also
--------
str.islower
Examples
--------
>>> np.strings.islower("GHC")
array(False)
>>> np.strings.islower("ghc")
array(True)
""")

add_newdoc('numpy._core.umath', 'isupper',
"""
Return true for each element if all cased characters in the
string are uppercase and there is at least one character, false
otherwise.
Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS
Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1
See Also
--------
str.isupper
Examples
--------
>>> np.strings.isupper("GHC")
array(True)
>>> a = np.array(["hello", "HELLO", "Hello"])
>>> np.strings.isupper(a)
array([False, True, False])
""")

add_newdoc('numpy._core.umath', 'istitle',
"""
Returns true for each element if the element is a titlecased
string and there is at least one character, false otherwise.
Parameters
----------
x : array_like, with `np.bytes_` or `np.str_` dtype
$PARAMS
Returns
-------
out : ndarray
Output array of bools
$OUT_SCALAR_1
See Also
--------
str.istitle
Examples
--------
>>> np.strings.istitle("Numpy Is Great")
array(True)
>>> np.strings.istitle("Numpy is great")
array(False)
""")

add_newdoc('numpy._core.umath', 'isdecimal',
"""
For each element, return True if there are only decimal
Expand Down
162 changes: 162 additions & 0 deletions numpy/_core/src/umath/string_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ enum class IMPLEMENTED_UNARY_FUNCTIONS {
ISDECIMAL,
ISDIGIT,
ISSPACE,
ISLOWER,
ISUPPER,
ISTITLE,
ISNUMERIC,
STR_LEN,
};
Expand Down Expand Up @@ -136,6 +139,81 @@ codepoint_isspace<ENCODING::UTF8>(npy_ucs4 code)
return Py_UNICODE_ISSPACE(code);
}

template<ENCODING enc>
inline bool
codepoint_islower(npy_ucs4 code);

template<>
inline bool
codepoint_islower<ENCODING::ASCII>(npy_ucs4 code)
{
return Py_ISLOWER((char) code);
}

template<>
inline bool
codepoint_islower<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISLOWER(code);
}

template<>
inline bool
codepoint_islower<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISLOWER(code);
}

template<ENCODING enc>
inline bool
codepoint_isupper(npy_ucs4 code);

template<>
inline bool
codepoint_isupper<ENCODING::ASCII>(npy_ucs4 code)
{
return Py_ISUPPER((char) code);
}

template<>
inline bool
codepoint_isupper<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISUPPER(code);
}

template<>
inline bool
codepoint_isupper<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISUPPER(code);
}

template<ENCODING enc>
inline bool
codepoint_istitle(npy_ucs4);

template<>
inline bool
codepoint_istitle<ENCODING::ASCII>(npy_ucs4 code)
{
return false;
}

template<>
inline bool
codepoint_istitle<ENCODING::UTF32>(npy_ucs4 code)
{
return Py_UNICODE_ISTITLE(code);
}

template<>
inline bool
codepoint_istitle<ENCODING::UTF8>(npy_ucs4 code)
{
return Py_UNICODE_ISTITLE(code);
}

inline bool
codepoint_isnumeric(npy_ucs4 code)
{
Expand Down Expand Up @@ -389,6 +467,84 @@ struct Buffer {
return unary_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT>();
}

inline bool
islower()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = 0;
for (size_t i = 0; i < len; i++) {
if (codepoint_isupper<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
return false;
}
else if (!cased && codepoint_islower<enc>(*tmp)) {
cased = true;
}
tmp++;
}
return cased;
}

inline bool
isupper()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = 0;
for (size_t i = 0; i < len; i++) {
if (codepoint_islower<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
return false;
}
else if (!cased && codepoint_isupper<enc>(*tmp)) {
cased = true;
}
tmp++;
}
return cased;
}

inline bool
istitle()
{
size_t len = num_codepoints();
if (len == 0) {
return false;
}

Buffer<enc> tmp = *this;
bool cased = false;
bool previous_is_cased = false;
for (size_t i = 0; i < len; i++) {
if (codepoint_isupper<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
if (previous_is_cased) {
return false;
}
previous_is_cased = true;
cased = true;
}
else if (codepoint_islower<enc>(*tmp)) {
if (!previous_is_cased) {
return false;
}
previous_is_cased = true;
cased = true;
}
else {
previous_is_cased = false;
}
tmp++;
}
return cased;
}

inline bool
isnumeric()
{
Expand Down Expand Up @@ -466,6 +622,12 @@ struct call_buffer_member_function {
return codepoint_isspace<enc>(*buf);
case IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN:
return (T)buf.num_codepoints();
case IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER:
return (T)buf.islower();
case IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER:
return (T)buf.isupper();
case IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE:
return (T)buf.istitle();
case IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC:
return codepoint_isnumeric(*buf);
case IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL:
Expand Down
Loading

0 comments on commit fdb3062

Please sign in to comment.