From fdb306219a69da782486b7304a5203923aa99d38 Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Thu, 8 Feb 2024 13:30:30 +0100
Subject: [PATCH 1/9] ENH: Add islower/isupper/istitle ufuncs for unicode/bytes
 dtypes

---
 numpy/_core/code_generators/generate_umath.py |  15 ++
 .../_core/code_generators/ufunc_docstrings.py |  91 ++++++++++
 numpy/_core/src/umath/string_buffer.h         | 162 ++++++++++++++++++
 numpy/_core/src/umath/string_ufuncs.cpp       | 111 ++++++++++++
 numpy/_core/strings.py                        | 114 +-----------
 numpy/_core/tests/test_strings.py             | 111 ++++++++++++
 6 files changed, 497 insertions(+), 107 deletions(-)

diff --git a/numpy/_core/code_generators/generate_umath.py b/numpy/_core/code_generators/generate_umath.py
index c4e0c0facfff..50c0ea673d37 100644
--- a/numpy/_core/code_generators/generate_umath.py
+++ b/numpy/_core/code_generators/generate_umath.py
@@ -1170,6 +1170,21 @@ def english_upper(s):
           docstrings.get('numpy._core.umath.isspace'),
           None,
           ),
+'islower':
+    Ufunc(1, 1, False_,
+          docstrings.get('numpy._core.umath.islower'),
+          None,
+          ),
+'isupper':
+    Ufunc(1, 1, False_,
+          docstrings.get('numpy._core.umath.isupper'),
+          None,
+          ),
+'istitle':
+    Ufunc(1, 1, False_,
+          docstrings.get('numpy._core.umath.istitle'),
+          None,
+          ),
 'isdecimal':
     Ufunc(1, 1, False_,
           docstrings.get('numpy._core.umath.isdecimal'),
diff --git a/numpy/_core/code_generators/ufunc_docstrings.py b/numpy/_core/code_generators/ufunc_docstrings.py
index 461549367084..a081856f44b6 100644
--- a/numpy/_core/code_generators/ufunc_docstrings.py
+++ b/numpy/_core/code_generators/ufunc_docstrings.py
@@ -4441,6 +4441,97 @@ def add_newdoc(place, name, doc):
 
     """)
 
+add_newdoc('numpy._core.umath', 'islower',
+    """
+    Returns true for each element if all cased characters in the
+    string are lowercase and there is at least one cased character,
+    false otherwise.
+
+    Parameters
+    ----------
+    x : array_like, with `np.bytes_` or `np.str_` dtype
+    $PARAMS
+
+    Returns
+    -------
+    out : ndarray
+        Output array of bools
+        $OUT_SCALAR_1
+
+    See Also
+    --------
+    str.islower
+
+    Examples
+    --------
+    >>> np.strings.islower("GHC")
+    array(False)
+    >>> np.strings.islower("ghc")
+    array(True)
+
+    """)
+
+add_newdoc('numpy._core.umath', 'isupper',
+    """
+    Return true for each element if all cased characters in the
+    string are uppercase and there is at least one character, false
+    otherwise.
+
+    Parameters
+    ----------
+    x : array_like, with `np.bytes_` or `np.str_` dtype
+    $PARAMS
+
+    Returns
+    -------
+    out : ndarray
+        Output array of bools
+        $OUT_SCALAR_1
+
+    See Also
+    --------
+    str.isupper
+
+    Examples
+    --------
+    >>> np.strings.isupper("GHC")
+    array(True)     
+    >>> a = np.array(["hello", "HELLO", "Hello"])
+    >>> np.strings.isupper(a)
+    array([False,  True, False]) 
+
+    """)
+
+add_newdoc('numpy._core.umath', 'istitle',
+    """
+    Returns true for each element if the element is a titlecased
+    string and there is at least one character, false otherwise.
+
+    Parameters
+    ----------
+    x : array_like, with `np.bytes_` or `np.str_` dtype
+    $PARAMS
+
+    Returns
+    -------
+    out : ndarray
+        Output array of bools
+        $OUT_SCALAR_1
+
+    See Also
+    --------
+    str.istitle
+
+    Examples
+    --------
+    >>> np.strings.istitle("Numpy Is Great")
+    array(True)
+
+    >>> np.strings.istitle("Numpy is great")
+    array(False)
+    
+    """)
+
 add_newdoc('numpy._core.umath', 'isdecimal',
     """
     For each element, return True if there are only decimal
diff --git a/numpy/_core/src/umath/string_buffer.h b/numpy/_core/src/umath/string_buffer.h
index 45139237ff5d..c3bc75e3a759 100644
--- a/numpy/_core/src/umath/string_buffer.h
+++ b/numpy/_core/src/umath/string_buffer.h
@@ -26,6 +26,9 @@ enum class IMPLEMENTED_UNARY_FUNCTIONS {
     ISDECIMAL,
     ISDIGIT,
     ISSPACE,
+    ISLOWER,
+    ISUPPER,
+    ISTITLE,
     ISNUMERIC,
     STR_LEN,
 };
@@ -136,6 +139,81 @@ codepoint_isspace<ENCODING::UTF8>(npy_ucs4 code)
     return Py_UNICODE_ISSPACE(code);
 }
 
+template<ENCODING enc>
+inline bool
+codepoint_islower(npy_ucs4 code);
+
+template<>
+inline bool
+codepoint_islower<ENCODING::ASCII>(npy_ucs4 code)
+{
+    return Py_ISLOWER((char) code);
+}
+
+template<>
+inline bool
+codepoint_islower<ENCODING::UTF32>(npy_ucs4 code)
+{
+    return Py_UNICODE_ISLOWER(code);
+}
+
+template<>
+inline bool
+codepoint_islower<ENCODING::UTF8>(npy_ucs4 code)
+{
+    return Py_UNICODE_ISLOWER(code);
+}
+
+template<ENCODING enc>
+inline bool
+codepoint_isupper(npy_ucs4 code);
+
+template<>
+inline bool
+codepoint_isupper<ENCODING::ASCII>(npy_ucs4 code)
+{
+    return Py_ISUPPER((char) code);
+}
+
+template<>
+inline bool
+codepoint_isupper<ENCODING::UTF32>(npy_ucs4 code)
+{
+    return Py_UNICODE_ISUPPER(code);
+}
+
+template<>
+inline bool
+codepoint_isupper<ENCODING::UTF8>(npy_ucs4 code)
+{
+    return Py_UNICODE_ISUPPER(code);
+}
+
+template<ENCODING enc>
+inline bool
+codepoint_istitle(npy_ucs4);
+
+template<>
+inline bool
+codepoint_istitle<ENCODING::ASCII>(npy_ucs4 code)
+{
+    return false;
+}
+
+template<>
+inline bool
+codepoint_istitle<ENCODING::UTF32>(npy_ucs4 code)
+{
+    return Py_UNICODE_ISTITLE(code);
+}
+
+template<>
+inline bool
+codepoint_istitle<ENCODING::UTF8>(npy_ucs4 code)
+{
+    return Py_UNICODE_ISTITLE(code);
+}
+
 inline bool
 codepoint_isnumeric(npy_ucs4 code)
 {
@@ -389,6 +467,84 @@ struct Buffer {
         return unary_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT>();
     }
 
+    inline bool
+    islower()
+    {
+        size_t len = num_codepoints();
+        if (len == 0) {
+            return false;
+        }
+
+        Buffer<enc> tmp = *this;
+        bool cased = 0;
+        for (size_t i = 0; i < len; i++) {
+            if (codepoint_isupper<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
+                return false;
+            }
+            else if (!cased && codepoint_islower<enc>(*tmp)) {
+                cased = true;
+            }
+            tmp++;
+        }
+        return cased;
+    }
+
+    inline bool
+    isupper()
+    {
+        size_t len = num_codepoints();
+        if (len == 0) {
+            return false;
+        }
+
+        Buffer<enc> tmp = *this;
+        bool cased = 0;
+        for (size_t i = 0; i < len; i++) {
+            if (codepoint_islower<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
+                return false;
+            }
+            else if (!cased && codepoint_isupper<enc>(*tmp)) {
+                cased = true;
+            }
+            tmp++;
+        }
+        return cased;
+    }
+
+    inline bool
+    istitle()
+    {
+        size_t len = num_codepoints();
+        if (len == 0) {
+            return false;
+        }
+
+        Buffer<enc> tmp = *this;
+        bool cased = false;
+        bool previous_is_cased = false;
+        for (size_t i = 0; i < len; i++) {
+            if (codepoint_isupper<enc>(*tmp) || codepoint_istitle<enc>(*tmp)) {
+                if (previous_is_cased) {
+                    return false;
+                }
+                previous_is_cased = true;
+                cased = true;
+            }
+            else if (codepoint_islower<enc>(*tmp)) {
+                if (!previous_is_cased) {
+                    return false;
+                }
+                previous_is_cased = true;
+                cased = true;
+            }
+            else {
+                previous_is_cased = false;
+            }
+            tmp++;
+        }
+        return cased;
+    }
+
     inline bool
     isnumeric()
     {
@@ -466,6 +622,12 @@ struct call_buffer_member_function {
                 return codepoint_isspace<enc>(*buf);
             case IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN:
                 return (T)buf.num_codepoints();
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER:
+                return (T)buf.islower();
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER:
+                return (T)buf.isupper();
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE:
+                return (T)buf.istitle();
             case IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC:
                 return codepoint_isnumeric(*buf);
             case IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL:
diff --git a/numpy/_core/src/umath/string_ufuncs.cpp b/numpy/_core/src/umath/string_ufuncs.cpp
index a9ebd929e4a2..07fe1b81fd2d 100644
--- a/numpy/_core/src/umath/string_ufuncs.cpp
+++ b/numpy/_core/src/umath/string_ufuncs.cpp
@@ -283,6 +283,84 @@ string_isspace_loop(PyArrayMethod_Context *context,
 }
 
 
+template <ENCODING enc>
+static int
+string_islower_loop(PyArrayMethod_Context *context,
+        char *const data[], npy_intp const dimensions[],
+        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+{
+    int elsize = context->descriptors[0]->elsize;
+
+    char *in = data[0];
+    char *out = data[1];
+
+    npy_intp N = dimensions[0];
+
+    while (N--) {
+        Buffer<enc> buf(in, elsize);
+        npy_bool res = buf.islower();
+        *(npy_bool *)out = res;
+
+        in += strides[0];
+        out += strides[1];
+    }
+
+    return 0;
+}
+
+
+template <ENCODING enc>
+static int
+string_isupper_loop(PyArrayMethod_Context *context,
+        char *const data[], npy_intp const dimensions[],
+        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+{
+    int elsize = context->descriptors[0]->elsize;
+
+    char *in = data[0];
+    char *out = data[1];
+
+    npy_intp N = dimensions[0];
+
+    while (N--) {
+        Buffer<enc> buf(in, elsize);
+        npy_bool res = buf.isupper();
+        *(npy_bool *)out = res;
+
+        in += strides[0];
+        out += strides[1];
+    }
+
+    return 0;
+}
+
+
+template <ENCODING enc>
+static int
+string_istitle_loop(PyArrayMethod_Context *context,
+        char *const data[], npy_intp const dimensions[],
+        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+{
+    int elsize = context->descriptors[0]->elsize;
+
+    char *in = data[0];
+    char *out = data[1];
+
+    npy_intp N = dimensions[0];
+
+    while (N--) {
+        Buffer<enc> buf(in, elsize);
+        npy_bool res = buf.istitle();
+        *(npy_bool *)out = res;
+
+        in += strides[0];
+        out += strides[1];
+    }
+
+    return 0;
+}
+
+
 static int
 string_isdecimal_loop(PyArrayMethod_Context *context,
         char *const data[], npy_intp const dimensions[],
@@ -1255,6 +1333,39 @@ init_string_ufuncs(PyObject *umath)
         return -1;
     }
 
+    if (init_ufunc<ENCODING::ASCII>(
+            umath, "islower", "templated_string_islower", 1, 1, dtypes,
+            string_islower_loop<ENCODING::ASCII>, NULL) < 0) {
+        return -1;
+    }
+    if (init_ufunc<ENCODING::UTF32>(
+            umath, "islower", "templated_string_islower", 1, 1, dtypes,
+            string_islower_loop<ENCODING::UTF32>, NULL) < 0) {
+        return -1;
+    }
+
+    if (init_ufunc<ENCODING::ASCII>(
+            umath, "isupper", "templated_string_isupper", 1, 1, dtypes,
+            string_isupper_loop<ENCODING::ASCII>, NULL) < 0) {
+        return -1;
+    }
+    if (init_ufunc<ENCODING::UTF32>(
+            umath, "isupper", "templated_string_isupper", 1, 1, dtypes,
+            string_isupper_loop<ENCODING::UTF32>, NULL) < 0) {
+        return -1;
+    }
+
+    if (init_ufunc<ENCODING::ASCII>(
+            umath, "istitle", "templated_string_istitle", 1, 1, dtypes,
+            string_istitle_loop<ENCODING::ASCII>, NULL) < 0) {
+        return -1;
+    }
+    if (init_ufunc<ENCODING::UTF32>(
+            umath, "istitle", "templated_string_istitle", 1, 1, dtypes,
+            string_istitle_loop<ENCODING::UTF32>, NULL) < 0) {
+        return -1;
+    }
+
     if (init_ufunc<ENCODING::UTF32>(
             umath, "isdecimal", "templated_string_isdecimal", 1, 1, dtypes,
             string_isdecimal_loop, NULL) < 0) {
diff --git a/numpy/_core/strings.py b/numpy/_core/strings.py
index 31ed004d78c7..6aa3da77f90a 100644
--- a/numpy/_core/strings.py
+++ b/numpy/_core/strings.py
@@ -13,6 +13,9 @@
     isalpha,
     isdigit,
     isspace,
+    islower,
+    isupper,
+    istitle,
     isdecimal,
     isnumeric,
     str_len,
@@ -34,12 +37,12 @@
 __all__ = [
     # UFuncs
     "equal", "not_equal", "less", "less_equal", "greater", "greater_equal",
-    "add", "isalpha", "isdigit", "isspace", "isdecimal", "isnumeric",
-    "str_len", "find", "rfind", "count", "startswith", "endswith",
-    "lstrip", "rstrip", "strip", "replace",
+    "add", "isalpha", "isdigit", "isspace", "islower", "isupper", "istitle",
+    "isdecimal", "isnumeric", "str_len", "find", "rfind", "count",
+    "startswith", "endswith", "lstrip", "rstrip", "strip", "replace",
 
     # _vec_string - Will gradually become ufuncs as well
-    "isalnum", "islower", "istitle", "isupper", "multiply", "mod", "index",
+    "isalnum", "multiply", "mod", "index",
     "rindex", "decode", "encode", "expandtabs", "center",
     "ljust", "rjust", "zfill", "upper", "lower", "swapcase", "capitalize",
     "title", "join", "split", "rsplit", "splitlines",
@@ -125,109 +128,6 @@ def isalnum(a):
     return _vec_string(a, np.bool, 'isalnum')
 
 
-def islower(a):
-    """
-    Returns true for each element if all cased characters in the
-    string are lowercase and there is at least one cased character,
-    false otherwise.
-
-    Calls :meth:`str.islower` element-wise.
-
-    For 8-bit strings, this method is locale-dependent.
-
-    Parameters
-    ----------
-    a : array_like, with `np.bytes_` or `np.str_` dtype
-
-    Returns
-    -------
-    out : ndarray
-        Output array of bools
-
-    See Also
-    --------
-    str.islower
-
-    Examples
-    --------
-    >>> np.strings.islower("GHC")
-    array(False)
-    >>> np.strings.islower("ghc")
-    array(True)
-
-    """
-    return _vec_string(a, np.bool, 'islower')
-
-
-def istitle(a):
-    """
-    Returns true for each element if the element is a titlecased
-    string and there is at least one character, false otherwise.
-
-    Call :meth:`str.istitle` element-wise.
-
-    For 8-bit strings, this method is locale-dependent.
-
-    Parameters
-    ----------
-    a : array_like, with `np.bytes_` or `np.str_` dtype
-
-    Returns
-    -------
-    out : ndarray
-        Output array of bools
-
-    See Also
-    --------
-    str.istitle
-
-    Examples
-    --------
-    >>> np.strings.istitle("Numpy Is Great")
-    array(True)
-
-    >>> np.strings.istitle("Numpy is great")
-    array(False)
-    
-    """
-    return _vec_string(a, np.bool, 'istitle')
-
-
-def isupper(a):
-    """
-    Return true for each element if all cased characters in the
-    string are uppercase and there is at least one character, false
-    otherwise.
-
-    Call :meth:`str.isupper` element-wise.
-
-    For 8-bit strings, this method is locale-dependent.
-
-    Parameters
-    ----------
-    a : array_like, with `np.bytes_` or `np.str_` dtype
-
-    Returns
-    -------
-    out : ndarray
-        Output array of bools
-
-    See Also
-    --------
-    str.isupper
-
-    Examples
-    --------
-    >>> np.strings.isupper("GHC")
-    array(True)     
-    >>> a = np.array(["hello", "HELLO", "Hello"])
-    >>> np.strings.isupper(a)
-    array([False,  True, False]) 
-
-    """
-    return _vec_string(a, np.bool, 'isupper')
-
-
 def multiply(a, i):
     """
     Return (a * i), that is string multiple concatenation,
diff --git a/numpy/_core/tests/test_strings.py b/numpy/_core/tests/test_strings.py
index f6f7a0ced0dd..efe24658025d 100644
--- a/numpy/_core/tests/test_strings.py
+++ b/numpy/_core/tests/test_strings.py
@@ -167,6 +167,61 @@ def test_isspace(self, in_, out, dt):
         in_ = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.isspace(in_), out)
 
+    @pytest.mark.parametrize("in_,out", [
+        ('', False),
+        ('a', True),
+        ('A', False),
+        ('\n', False),
+        ('abc', True),
+        ('aBc', False),
+        ('abc\n', True),
+    ])
+    def test_islower(self, in_, out, dt):
+        # TODO: Remove this
+        if dt == "T":
+            pytest.xfail(
+                "StringDType support to be added in a follow-up commit")
+        in_ = np.array(in_, dtype=dt)
+        assert_array_equal(np.strings.islower(in_), out)
+
+    @pytest.mark.parametrize("in_,out", [
+        ('', False),
+        ('a', False),
+        ('A', True),
+        ('\n', False),
+        ('ABC', True),
+        ('AbC', False),
+        ('ABC\n', True),
+    ])
+    def test_isupper(self, in_, out, dt):
+        # TODO: Remove this
+        if dt == "T":
+            pytest.xfail(
+                "StringDType support to be added in a follow-up commit")
+        in_ = np.array(in_, dtype=dt)
+        assert_array_equal(np.strings.isupper(in_), out)
+
+    @pytest.mark.parametrize("in_,out", [
+        ('', False),
+        ('a', False),
+        ('A', True),
+        ('\n', False),
+        ('A Titlecased Line', True),
+        ('A\nTitlecased Line', True),
+        ('A Titlecased, Line', True),
+        ('Not a capitalized String', False),
+        ('Not\ta Titlecase String', False),
+        ('Not--a Titlecase String', False),
+        ('NOT', False),
+    ])
+    def test_istitle(self, in_, out, dt):
+        # TODO: Remove this
+        if dt == "T":
+            pytest.xfail(
+                "StringDType support to be added in a follow-up commit")
+        in_ = np.array(in_, dtype=dt)
+        assert_array_equal(np.strings.istitle(in_), out)
+
     @pytest.mark.parametrize("in_,out", [
         ("", 0),
         ("abc", 3),
@@ -617,6 +672,62 @@ def test_replace_unicode(self, dt):
         assert_array_equal(np.strings.replace(buf,  "<", "&lt;", MAX),
                            "...\u043c......&lt;")
 
+    @pytest.mark.parametrize("dt", ["U", "T"])
+    @pytest.mark.parametrize("in_,out", [
+        ('\u1FFc', False),
+        ('\u2167', False),
+        ('\U00010401', False),
+        ('\U00010427', False),
+        ('\U0001F40D', False),
+        ('\U0001F46F', False),
+        ('\u2177', True),
+        ('\U00010429', True),
+        ('\U0001044E', True),
+    ])
+    def test_islower_unicode(self, in_, out, dt):
+        # TODO: Remove this
+        if dt == "T":
+            pytest.xfail(
+                "StringDType support to be added in a follow-up commit")
+        assert_array_equal(np.strings.islower(in_), out)
+
+    @pytest.mark.parametrize("dt", ["U", "T"])
+    @pytest.mark.parametrize("in_,out", [
+        ('\u1FFc', False),
+        ('\u2167', True),
+        ('\U00010401', True),
+        ('\U00010427', True),
+        ('\U0001F40D', False),
+        ('\U0001F46F', False),
+        ('\u2177', False),
+        ('\U00010429', False),
+        ('\U0001044E', False),
+    ])
+    def test_isupper_unicode(self, in_, out, dt):
+        # TODO: Remove this
+        if dt == "T":
+            pytest.xfail(
+                "StringDType support to be added in a follow-up commit")
+        assert_array_equal(np.strings.isupper(in_), out)
+
+    @pytest.mark.parametrize("dt", ["U", "T"])
+    @pytest.mark.parametrize("in_,out", [
+        ('\u1FFc', True),
+        ('Greek \u1FFcitlecases ...', True),
+        ('\U00010401\U00010429', True),
+        ('\U00010427\U0001044E', True),
+        ('\U00010429', False),
+        ('\U0001044E', False),
+        ('\U0001F40D', False),
+        ('\U0001F46F', False),
+    ])
+    def test_istitle_unicode(self, in_, out, dt):
+        # TODO: Remove this
+        if dt == "T":
+            pytest.xfail(
+                "StringDType support to be added in a follow-up commit")
+        assert_array_equal(np.strings.istitle(in_), out)
+
 
 def check_itemsize(n_elem, dt):
     if dt == "T":

From 4c440cacceaa426afb15d7c5f292828c5a005a38 Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Tue, 13 Feb 2024 11:45:20 +0100
Subject: [PATCH 2/9] Use unary loop with templating instead of different loops
 for each ufunc

---
 numpy/_core/src/umath/string_buffer.h   |   8 -
 numpy/_core/src/umath/string_ufuncs.cpp | 381 ++++++------------------
 2 files changed, 95 insertions(+), 294 deletions(-)

diff --git a/numpy/_core/src/umath/string_buffer.h b/numpy/_core/src/umath/string_buffer.h
index c3bc75e3a759..b9f460f3d0b8 100644
--- a/numpy/_core/src/umath/string_buffer.h
+++ b/numpy/_core/src/umath/string_buffer.h
@@ -620,14 +620,6 @@ struct call_buffer_member_function {
                 return codepoint_isdigit<enc>(*buf);
             case IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE:
                 return codepoint_isspace<enc>(*buf);
-            case IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN:
-                return (T)buf.num_codepoints();
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER:
-                return (T)buf.islower();
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER:
-                return (T)buf.isupper();
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE:
-                return (T)buf.istitle();
             case IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC:
                 return codepoint_isnumeric(*buf);
             case IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL:
diff --git a/numpy/_core/src/umath/string_ufuncs.cpp b/numpy/_core/src/umath/string_ufuncs.cpp
index 07fe1b81fd2d..219d22e253a9 100644
--- a/numpy/_core/src/umath/string_ufuncs.cpp
+++ b/numpy/_core/src/umath/string_ufuncs.cpp
@@ -21,58 +21,6 @@
 #include "string_buffer.h"
 
 
-template <ENCODING enc>
-static inline void
-string_add(Buffer<enc> buf1, Buffer<enc> buf2, Buffer<enc> out)
-{
-    size_t len1 = buf1.num_codepoints();
-    size_t len2 = buf2.num_codepoints();
-    buf1.buffer_memcpy(out, len1);
-    buf2.buffer_memcpy(out + len1, len2);
-    out.buffer_fill_with_zeros_after_index(len1 + len2);
-}
-
-
-static inline npy_bool
-string_isdecimal(Buffer<ENCODING::UTF32> buf)
-{
-    size_t len = buf.num_codepoints();
-
-    if (len == 0) {
-        return (npy_bool) 0;
-    }
-
-    for (size_t i = 0; i < len; i++) {
-        npy_bool isdecimal = (npy_bool) Py_UNICODE_ISDECIMAL(*buf);
-        if (!isdecimal) {
-            return isdecimal;
-        }
-        buf++;
-    }
-    return (npy_bool) 1;
-}
-
-
-static inline npy_bool
-string_isnumeric(Buffer<ENCODING::UTF32> buf)
-{
-    size_t len = buf.num_codepoints();
-
-    if (len == 0) {
-        return (npy_bool) 0;
-    }
-
-    for (size_t i = 0; i < len; i++) {
-        npy_bool isnumeric = (npy_bool) Py_UNICODE_ISNUMERIC(*buf);
-        if (!isnumeric) {
-            return isnumeric;
-        }
-        buf++;
-    }
-    return (npy_bool) 1;
-}
-
-
 /*
  * Helper for templating, avoids warnings about uncovered switch paths.
  */
@@ -151,143 +99,48 @@ string_comparison_loop(PyArrayMethod_Context *context,
 }
 
 
-template <ENCODING enc>
-static int
-string_add_loop(PyArrayMethod_Context *context,
-                char *const data[], npy_intp const dimensions[],
-                npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
-{
-    int elsize1 = context->descriptors[0]->elsize;
-    int elsize2 = context->descriptors[1]->elsize;
-    int outsize = context->descriptors[2]->elsize;
-
-    char *in1 = data[0];
-    char *in2 = data[1];
-    char *out = data[2];
-
-    npy_intp N = dimensions[0];
-
-    while (N--) {
-        Buffer<enc> buf1(in1, elsize1);
-        Buffer<enc> buf2(in2, elsize2);
-        Buffer<enc> outbuf(out, outsize);
-        string_add<enc>(buf1, buf2, outbuf);
-
-        in1 += strides[0];
-        in2 += strides[1];
-        out += strides[2];
-    }
-
-    return 0;
-}
-
-
-template <ENCODING enc>
-static int
-string_len_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
-{
-    int elsize = context->descriptors[0]->elsize;
-
-    char *in = data[0];
-    char *out = data[1];
-
-    npy_intp N = dimensions[0];
-
-    while (N--) {
-        Buffer<enc> buf(in, elsize);
-        *(npy_intp *)out = buf.num_codepoints();
-
-        in += strides[0];
-        out += strides[1];
-    }
-
-    return 0;
-}
-
-
-template <ENCODING enc>
-static int
-string_isalpha_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
-{
-    PyArray_Descr *descr = context->descriptors[0];
-    int elsize = descr->elsize;
-
-    char *in = data[0];
-    char *out = data[1];
-
-    npy_intp N = dimensions[0];
-
-    while (N--) {
-        Buffer<enc> buf(in, elsize);
-        *(npy_bool *)out = (npy_bool) buf.isalpha();
-
-        in += strides[0];
-        out += strides[1];
-    }
-
-    return 0;
-}
-
-
-template <ENCODING enc>
-static int
-string_isdigit_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
-{
-    int elsize = context->descriptors[0]->elsize;
-
-    char *in = data[0];
-    char *out = data[1];
-
-    npy_intp N = dimensions[0];
-
-    while (N--) {
-        Buffer<enc> buf(in, elsize);
-        *(npy_bool *)out = (npy_bool) buf.isdigit();
-
-        in += strides[0];
-        out += strides[1];
-    }
-
-    return 0;
-}
-
-
-template <ENCODING enc>
-static int
-string_isspace_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
-{
-    int elsize = context->descriptors[0]->elsize;
-
-    char *in = data[0];
-    char *out = data[1];
-
-    npy_intp N = dimensions[0];
-
-    while (N--) {
-        Buffer<enc> buf(in, elsize);
-        *(npy_bool *)out = (npy_bool) buf.isspace();
-
-        in += strides[0];
-        out += strides[1];
+template <ENCODING enc, IMPLEMENTED_UNARY_FUNCTIONS f, typename T>
+struct call_buffer_unary_function {
+    void operator()(const char *buffer, size_t size, char *out) {
+        Buffer<enc> buf((char *)buffer, size);
+        switch (f) {
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISALPHA:
+                *(T *)out = buf.isalpha();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL:
+                *(T *)out = buf.isdecimal();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT:
+                *(T *)out = buf.isdigit();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC:
+                *(T *)out = buf.isnumeric();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE:
+                *(T *)out = buf.isspace();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER:
+                *(T *)out = buf.islower();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER:
+                *(T *)out = buf.isupper();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE:
+                *(T *)out = buf.istitle();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN:
+                *(T *)out = buf.num_codepoints();
+                break;
+        }
     }
-
-    return 0;
-}
+};
 
 
-template <ENCODING enc>
+template <ENCODING enc, IMPLEMENTED_UNARY_FUNCTIONS f, typename return_value>
 static int
-string_islower_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+string_unary_loop(PyArrayMethod_Context *context,
+                  char *const data[], npy_intp const dimensions[],
+                  npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
 {
     int elsize = context->descriptors[0]->elsize;
 
@@ -297,9 +150,8 @@ string_islower_loop(PyArrayMethod_Context *context,
     npy_intp N = dimensions[0];
 
     while (N--) {
-        Buffer<enc> buf(in, elsize);
-        npy_bool res = buf.islower();
-        *(npy_bool *)out = res;
+        call_buffer_unary_function<enc, f, return_value> cbuf;
+        cbuf(in, (size_t) elsize, out);
 
         in += strides[0];
         out += strides[1];
@@ -310,101 +162,42 @@ string_islower_loop(PyArrayMethod_Context *context,
 
 
 template <ENCODING enc>
-static int
-string_isupper_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+static inline void
+string_add(Buffer<enc> buf1, Buffer<enc> buf2, Buffer<enc> out)
 {
-    int elsize = context->descriptors[0]->elsize;
-
-    char *in = data[0];
-    char *out = data[1];
-
-    npy_intp N = dimensions[0];
-
-    while (N--) {
-        Buffer<enc> buf(in, elsize);
-        npy_bool res = buf.isupper();
-        *(npy_bool *)out = res;
-
-        in += strides[0];
-        out += strides[1];
-    }
-
-    return 0;
+    size_t len1 = buf1.num_codepoints();
+    size_t len2 = buf2.num_codepoints();
+    buf1.buffer_memcpy(out, len1);
+    buf2.buffer_memcpy(out + len1, len2);
+    out.buffer_fill_with_zeros_after_index(len1 + len2);
 }
 
 
 template <ENCODING enc>
 static int
-string_istitle_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
-{
-    int elsize = context->descriptors[0]->elsize;
-
-    char *in = data[0];
-    char *out = data[1];
-
-    npy_intp N = dimensions[0];
-
-    while (N--) {
-        Buffer<enc> buf(in, elsize);
-        npy_bool res = buf.istitle();
-        *(npy_bool *)out = res;
-
-        in += strides[0];
-        out += strides[1];
-    }
-
-    return 0;
-}
-
-
-static int
-string_isdecimal_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
-{
-    int elsize = context->descriptors[0]->elsize;
-
-    char *in = data[0];
-    char *out = data[1];
-
-    npy_intp N = dimensions[0];
-
-    while (N--) {
-        Buffer<ENCODING::UTF32> buf(in, elsize);
-        npy_bool res = string_isdecimal(buf);
-        *(npy_bool *)out = res;
-
-        in += strides[0];
-        out += strides[1];
-    }
-
-    return 0;
-}
-
-
-static int
-string_isnumeric_loop(PyArrayMethod_Context *context,
-        char *const data[], npy_intp const dimensions[],
-        npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+string_add_loop(PyArrayMethod_Context *context,
+                char *const data[], npy_intp const dimensions[],
+                npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
 {
-    int elsize = context->descriptors[0]->elsize;
+    int elsize1 = context->descriptors[0]->elsize;
+    int elsize2 = context->descriptors[1]->elsize;
+    int outsize = context->descriptors[2]->elsize;
 
-    char *in = data[0];
-    char *out = data[1];
+    char *in1 = data[0];
+    char *in2 = data[1];
+    char *out = data[2];
 
     npy_intp N = dimensions[0];
 
     while (N--) {
-        Buffer<ENCODING::UTF32> buf(in, elsize);
-        npy_bool res = string_isnumeric(buf);
-        *(npy_bool *)out = res;
+        Buffer<enc> buf1(in1, elsize1);
+        Buffer<enc> buf2(in2, elsize2);
+        Buffer<enc> outbuf(out, outsize);
+        string_add<enc>(buf1, buf2, outbuf);
 
-        in += strides[0];
-        out += strides[1];
+        in1 += strides[0];
+        in2 += strides[1];
+        out += strides[2];
     }
 
     return 0;
@@ -1195,12 +988,14 @@ init_string_ufuncs(PyObject *umath)
     dtypes[1] = NPY_DEFAULT_INT;
     if (init_ufunc<ENCODING::ASCII>(
             umath, "str_len", "templated_string_len", 1, 1, dtypes,
-            string_len_loop<ENCODING::ASCII>, NULL) < 0) {
+            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN, npy_intp>,
+            NULL) < 0) {
         return -1;
     }
     if (init_ufunc<ENCODING::UTF32>(
             umath, "str_len", "templated_string_len", 1, 1, dtypes,
-            string_len_loop<ENCODING::UTF32>, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN, npy_intp>,
+            NULL) < 0) {
         return -1;
     }
 
@@ -1208,12 +1003,14 @@ init_string_ufuncs(PyObject *umath)
     dtypes[1] = NPY_BOOL;
     if (init_ufunc<ENCODING::ASCII>(
             umath, "isalpha", "templated_string_isalpha", 1, 1, dtypes,
-            string_isalpha_loop<ENCODING::ASCII>, NULL) < 0) {
+            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISALPHA, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
     if (init_ufunc<ENCODING::UTF32>(
             umath, "isalpha", "templated_string_isalpha", 1, 1, dtypes,
-            string_isalpha_loop<ENCODING::UTF32>, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISALPHA, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
 
@@ -1313,68 +1110,80 @@ init_string_ufuncs(PyObject *umath)
     dtypes[1] = NPY_BOOL;
     if (init_ufunc<ENCODING::ASCII>(
             umath, "isdigit", "templated_string_isdigit", 1, 1, dtypes,
-            string_isdigit_loop<ENCODING::ASCII>, NULL) < 0) {
+            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
     if (init_ufunc<ENCODING::UTF32>(
             umath, "isdigit", "templated_string_isdigit", 1, 1, dtypes,
-            string_isdigit_loop<ENCODING::UTF32>, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
 
     if (init_ufunc<ENCODING::ASCII>(
             umath, "isspace", "templated_string_isspace", 1, 1, dtypes,
-            string_isspace_loop<ENCODING::ASCII>, NULL) < 0) {
+            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
     if (init_ufunc<ENCODING::UTF32>(
             umath, "isspace", "templated_string_isspace", 1, 1, dtypes,
-            string_isspace_loop<ENCODING::UTF32>, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
 
     if (init_ufunc<ENCODING::ASCII>(
             umath, "islower", "templated_string_islower", 1, 1, dtypes,
-            string_islower_loop<ENCODING::ASCII>, NULL) < 0) {
+            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
     if (init_ufunc<ENCODING::UTF32>(
             umath, "islower", "templated_string_islower", 1, 1, dtypes,
-            string_islower_loop<ENCODING::UTF32>, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
 
     if (init_ufunc<ENCODING::ASCII>(
             umath, "isupper", "templated_string_isupper", 1, 1, dtypes,
-            string_isupper_loop<ENCODING::ASCII>, NULL) < 0) {
+            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
     if (init_ufunc<ENCODING::UTF32>(
             umath, "isupper", "templated_string_isupper", 1, 1, dtypes,
-            string_isupper_loop<ENCODING::UTF32>, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
 
     if (init_ufunc<ENCODING::ASCII>(
             umath, "istitle", "templated_string_istitle", 1, 1, dtypes,
-            string_istitle_loop<ENCODING::ASCII>, NULL) < 0) {
+            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
     if (init_ufunc<ENCODING::UTF32>(
             umath, "istitle", "templated_string_istitle", 1, 1, dtypes,
-            string_istitle_loop<ENCODING::UTF32>, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
 
     if (init_ufunc<ENCODING::UTF32>(
             umath, "isdecimal", "templated_string_isdecimal", 1, 1, dtypes,
-            string_isdecimal_loop, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
 
     if (init_ufunc<ENCODING::UTF32>(
             umath, "isnumeric", "templated_string_isnumeric", 1, 1, dtypes,
-            string_isnumeric_loop, NULL) < 0) {
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC, npy_bool>,
+            NULL) < 0) {
         return -1;
     }
 

From b9db87c65d8d05daba592a81c7dd59891e66507b Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Tue, 13 Feb 2024 12:53:00 +0100
Subject: [PATCH 3/9] Add isalnum ufuncs for str and bytes

---
 numpy/_core/code_generators/generate_umath.py |  5 +++
 .../_core/code_generators/ufunc_docstrings.py | 28 +++++++++++++
 numpy/_core/src/common/numpyos.c              |  2 +-
 numpy/_core/src/common/numpyos.h              |  3 ++
 numpy/_core/src/umath/string_buffer.h         | 34 ++++++++++++++++
 numpy/_core/src/umath/string_ufuncs.cpp       | 16 ++++++++
 numpy/_core/strings.py                        | 39 ++-----------------
 numpy/_core/tests/test_strings.py             | 36 +++++++++++++++++
 8 files changed, 127 insertions(+), 36 deletions(-)

diff --git a/numpy/_core/code_generators/generate_umath.py b/numpy/_core/code_generators/generate_umath.py
index 50c0ea673d37..89977a19206f 100644
--- a/numpy/_core/code_generators/generate_umath.py
+++ b/numpy/_core/code_generators/generate_umath.py
@@ -1170,6 +1170,11 @@ def english_upper(s):
           docstrings.get('numpy._core.umath.isspace'),
           None,
           ),
+'isalnum':
+    Ufunc(1, 1, False_,
+          docstrings.get('numpy._core.umath.isalnum'),
+          None,
+          ),
 'islower':
     Ufunc(1, 1, False_,
           docstrings.get('numpy._core.umath.islower'),
diff --git a/numpy/_core/code_generators/ufunc_docstrings.py b/numpy/_core/code_generators/ufunc_docstrings.py
index a081856f44b6..eedd18b2939b 100644
--- a/numpy/_core/code_generators/ufunc_docstrings.py
+++ b/numpy/_core/code_generators/ufunc_docstrings.py
@@ -4441,6 +4441,34 @@ def add_newdoc(place, name, doc):
 
     """)
 
+add_newdoc('numpy._core.umath', 'isalnum',
+    """
+    Returns true for each element if all characters in the string are
+    alphanumeric and there is at least one character, false otherwise.
+
+    Parameters
+    ----------
+    x : array_like, with `np.bytes_` or `np.str_` dtype
+    $PARAMS
+
+    Returns
+    -------
+    out : ndarray
+        Output array of bool
+        $OUT_SCALAR_1
+
+    See Also
+    --------
+    str.isalnum
+
+    Examples
+    --------
+    >>> a = np.array(['a', '1', 'a1', '(', ''])
+    >>> np.strings.isalnum(a)
+    array([ True,  True,  True, False, False])
+    
+    """)
+
 add_newdoc('numpy._core.umath', 'islower',
     """
     Returns true for each element if all cased characters in the
diff --git a/numpy/_core/src/common/numpyos.c b/numpy/_core/src/common/numpyos.c
index 057595593e51..4b589966591e 100644
--- a/numpy/_core/src/common/numpyos.c
+++ b/numpy/_core/src/common/numpyos.c
@@ -381,7 +381,7 @@ NumPyOS_ascii_isdigit(char c)
  *
  * Same as isalnum under C locale
  */
-static int
+NPY_NO_EXPORT int
 NumPyOS_ascii_isalnum(char c)
 {
     return NumPyOS_ascii_isdigit(c) || NumPyOS_ascii_isalpha(c);
diff --git a/numpy/_core/src/common/numpyos.h b/numpy/_core/src/common/numpyos.h
index 96c167190222..2b2b88bdc954 100644
--- a/numpy/_core/src/common/numpyos.h
+++ b/numpy/_core/src/common/numpyos.h
@@ -41,6 +41,9 @@ NumPyOS_ascii_isalpha(char c);
 NPY_NO_EXPORT int
 NumPyOS_ascii_isdigit(char c);
 
+NPY_NO_EXPORT int
+NumPyOS_ascii_isalnum(char c);
+
 /* Convert a string to an int in an arbitrary base */
 NPY_NO_EXPORT npy_longlong
 NumPyOS_strtoll(const char *str, char **endptr, int base);
diff --git a/numpy/_core/src/umath/string_buffer.h b/numpy/_core/src/umath/string_buffer.h
index b9f460f3d0b8..1c143e644e7c 100644
--- a/numpy/_core/src/umath/string_buffer.h
+++ b/numpy/_core/src/umath/string_buffer.h
@@ -26,6 +26,7 @@ enum class IMPLEMENTED_UNARY_FUNCTIONS {
     ISDECIMAL,
     ISDIGIT,
     ISSPACE,
+    ISALNUM,
     ISLOWER,
     ISUPPER,
     ISTITLE,
@@ -139,6 +140,31 @@ codepoint_isspace<ENCODING::UTF8>(npy_ucs4 code)
     return Py_UNICODE_ISSPACE(code);
 }
 
+template<ENCODING enc>
+inline bool
+codepoint_isalnum(npy_ucs4 code);
+
+template<>
+inline bool
+codepoint_isalnum<ENCODING::ASCII>(npy_ucs4 code)
+{
+    return NumPyOS_ascii_isalnum(code);
+}
+
+template<>
+inline bool
+codepoint_isalnum<ENCODING::UTF32>(npy_ucs4 code)
+{
+    return Py_UNICODE_ISALNUM(code);
+}
+
+template<>
+inline bool
+codepoint_isalnum<ENCODING::UTF8>(npy_ucs4 code)
+{
+    return Py_UNICODE_ISALNUM(code);
+}
+
 template<ENCODING enc>
 inline bool
 codepoint_islower(npy_ucs4 code);
@@ -467,6 +493,12 @@ struct Buffer {
         return unary_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT>();
     }
 
+    inline bool
+    isalnum()
+    {
+        return unary_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM>();
+    }
+
     inline bool
     islower()
     {
@@ -620,6 +652,8 @@ struct call_buffer_member_function {
                 return codepoint_isdigit<enc>(*buf);
             case IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE:
                 return codepoint_isspace<enc>(*buf);
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM:
+                return codepoint_isalnum<enc>(*buf);
             case IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC:
                 return codepoint_isnumeric(*buf);
             case IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL:
diff --git a/numpy/_core/src/umath/string_ufuncs.cpp b/numpy/_core/src/umath/string_ufuncs.cpp
index 219d22e253a9..1d0dca84a8d3 100644
--- a/numpy/_core/src/umath/string_ufuncs.cpp
+++ b/numpy/_core/src/umath/string_ufuncs.cpp
@@ -119,6 +119,9 @@ struct call_buffer_unary_function {
             case IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE:
                 *(T *)out = buf.isspace();
                 break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM:
+                *(T *)out = buf.isalnum();
+                break;
             case IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER:
                 *(T *)out = buf.islower();
                 break;
@@ -1134,6 +1137,19 @@ init_string_ufuncs(PyObject *umath)
         return -1;
     }
 
+    if (init_ufunc<ENCODING::ASCII>(
+            umath, "isalnum", "templated_string_isalnum", 1, 1, dtypes,
+            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM, npy_bool>,
+            NULL) < 0) {
+        return -1;
+    }
+    if (init_ufunc<ENCODING::UTF32>(
+            umath, "isalnum", "templated_string_isalnum", 1, 1, dtypes,
+            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM, npy_bool>,
+            NULL) < 0) {
+        return -1;
+    }
+
     if (init_ufunc<ENCODING::ASCII>(
             umath, "islower", "templated_string_islower", 1, 1, dtypes,
             string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER, npy_bool>,
diff --git a/numpy/_core/strings.py b/numpy/_core/strings.py
index 6aa3da77f90a..99d2ef5e2b3e 100644
--- a/numpy/_core/strings.py
+++ b/numpy/_core/strings.py
@@ -13,6 +13,7 @@
     isalpha,
     isdigit,
     isspace,
+    isalnum,
     islower,
     isupper,
     istitle,
@@ -37,12 +38,12 @@
 __all__ = [
     # UFuncs
     "equal", "not_equal", "less", "less_equal", "greater", "greater_equal",
-    "add", "isalpha", "isdigit", "isspace", "islower", "isupper", "istitle",
-    "isdecimal", "isnumeric", "str_len", "find", "rfind", "count",
+    "add", "isalpha", "isdigit", "isspace", "isalnum", "islower", "isupper",
+    "istitle", "isdecimal", "isnumeric", "str_len", "find", "rfind", "count",
     "startswith", "endswith", "lstrip", "rstrip", "strip", "replace",
 
     # _vec_string - Will gradually become ufuncs as well
-    "isalnum", "multiply", "mod", "index",
+    "multiply", "mod", "index",
     "rindex", "decode", "encode", "expandtabs", "center",
     "ljust", "rjust", "zfill", "upper", "lower", "swapcase", "capitalize",
     "title", "join", "split", "rsplit", "splitlines",
@@ -96,38 +97,6 @@ def _clean_args(*args):
     return newargs
 
 
-def isalnum(a):
-    """
-    Returns true for each element if all characters in the string are
-    alphanumeric and there is at least one character, false otherwise.
-
-    Calls :meth:`str.isalnum` element-wise.
-
-    For 8-bit strings, this method is locale-dependent.
-
-    Parameters
-    ----------
-    a : array_like, with `np.bytes_` or `np.str_` dtype
-
-    Returns
-    -------
-    out : ndarray
-        Output array of str or unicode, depending on input type
-
-    See Also
-    --------
-    str.isalnum
-
-    Examples
-    --------
-    >>> a = np.array(['a', '1', 'a1', '(', ''])
-    >>> np.strings.isalnum(a)
-    array([ True,  True,  True, False, False])
-    
-    """
-    return _vec_string(a, np.bool, 'isalnum')
-
-
 def multiply(a, i):
     """
     Return (a * i), that is string multiple concatenation,
diff --git a/numpy/_core/tests/test_strings.py b/numpy/_core/tests/test_strings.py
index efe24658025d..091fa7388eb9 100644
--- a/numpy/_core/tests/test_strings.py
+++ b/numpy/_core/tests/test_strings.py
@@ -167,6 +167,24 @@ def test_isspace(self, in_, out, dt):
         in_ = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.isspace(in_), out)
 
+    @pytest.mark.parametrize("in_,out", [
+        ('', False),
+        ('a', True),
+        ('A', True),
+        ('\n', False),
+        ('123abc456', True),
+        ('a1b3c', True),
+        ('aBc000 ', False),
+        ('abc\n', False),
+    ])
+    def test_isalnum(self, in_, out, dt):
+        # TODO: Remove this
+        if dt == "T":
+            pytest.xfail(
+                "StringDType support to be added in a follow-up commit")
+        in_ = np.array(in_, dtype=dt)
+        assert_array_equal(np.strings.isalnum(in_), out)
+
     @pytest.mark.parametrize("in_,out", [
         ('', False),
         ('a', True),
@@ -672,6 +690,24 @@ def test_replace_unicode(self, dt):
         assert_array_equal(np.strings.replace(buf,  "<", "&lt;", MAX),
                            "...\u043c......&lt;")
 
+    @pytest.mark.parametrize("dt", ["U", "T"])
+    @pytest.mark.parametrize("in_", [
+        '\U00010401',
+        '\U00010427',
+        '\U00010429',
+        '\U0001044E',
+        '\U0001D7F6',
+        '\U00011066',
+        '\U000104A0',
+        '\U0001F107',
+    ])
+    def test_isalnum_unicode(self, in_, dt):
+        # TODO: Remove this
+        if dt == "T":
+            pytest.xfail(
+                "StringDType support to be added in a follow-up commit")
+        assert_array_equal(np.strings.isalnum(in_), True)
+
     @pytest.mark.parametrize("dt", ["U", "T"])
     @pytest.mark.parametrize("in_,out", [
         ('\u1FFc', False),

From b9202ca464e0b7cca14f800ad0cdc384c7bc1310 Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Wed, 14 Feb 2024 12:01:20 +0100
Subject: [PATCH 4/9] Refactor test to multiple parametrized classes

---
 numpy/_core/tests/test_strings.py | 207 +++++++++++++++---------------
 1 file changed, 100 insertions(+), 107 deletions(-)

diff --git a/numpy/_core/tests/test_strings.py b/numpy/_core/tests/test_strings.py
index 091fa7388eb9..312495e414af 100644
--- a/numpy/_core/tests/test_strings.py
+++ b/numpy/_core/tests/test_strings.py
@@ -167,79 +167,6 @@ def test_isspace(self, in_, out, dt):
         in_ = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.isspace(in_), out)
 
-    @pytest.mark.parametrize("in_,out", [
-        ('', False),
-        ('a', True),
-        ('A', True),
-        ('\n', False),
-        ('123abc456', True),
-        ('a1b3c', True),
-        ('aBc000 ', False),
-        ('abc\n', False),
-    ])
-    def test_isalnum(self, in_, out, dt):
-        # TODO: Remove this
-        if dt == "T":
-            pytest.xfail(
-                "StringDType support to be added in a follow-up commit")
-        in_ = np.array(in_, dtype=dt)
-        assert_array_equal(np.strings.isalnum(in_), out)
-
-    @pytest.mark.parametrize("in_,out", [
-        ('', False),
-        ('a', True),
-        ('A', False),
-        ('\n', False),
-        ('abc', True),
-        ('aBc', False),
-        ('abc\n', True),
-    ])
-    def test_islower(self, in_, out, dt):
-        # TODO: Remove this
-        if dt == "T":
-            pytest.xfail(
-                "StringDType support to be added in a follow-up commit")
-        in_ = np.array(in_, dtype=dt)
-        assert_array_equal(np.strings.islower(in_), out)
-
-    @pytest.mark.parametrize("in_,out", [
-        ('', False),
-        ('a', False),
-        ('A', True),
-        ('\n', False),
-        ('ABC', True),
-        ('AbC', False),
-        ('ABC\n', True),
-    ])
-    def test_isupper(self, in_, out, dt):
-        # TODO: Remove this
-        if dt == "T":
-            pytest.xfail(
-                "StringDType support to be added in a follow-up commit")
-        in_ = np.array(in_, dtype=dt)
-        assert_array_equal(np.strings.isupper(in_), out)
-
-    @pytest.mark.parametrize("in_,out", [
-        ('', False),
-        ('a', False),
-        ('A', True),
-        ('\n', False),
-        ('A Titlecased Line', True),
-        ('A\nTitlecased Line', True),
-        ('A Titlecased, Line', True),
-        ('Not a capitalized String', False),
-        ('Not\ta Titlecase String', False),
-        ('Not--a Titlecase String', False),
-        ('NOT', False),
-    ])
-    def test_istitle(self, in_, out, dt):
-        # TODO: Remove this
-        if dt == "T":
-            pytest.xfail(
-                "StringDType support to be added in a follow-up commit")
-        in_ = np.array(in_, dtype=dt)
-        assert_array_equal(np.strings.istitle(in_), out)
-
     @pytest.mark.parametrize("in_,out", [
         ("", 0),
         ("abc", 3),
@@ -641,8 +568,78 @@ def test_replace(self, buf, old, new, count, res, dt):
         assert_array_equal(np.strings.replace(buf, old, new, count), res)
 
 
-class TestUnicodeOnlyMethods:
-    @pytest.mark.parametrize("dt", ["U", "T"])
+@pytest.mark.parametrize("dt", [
+    "S",
+    "U",
+    pytest.param("T", marks=pytest.mark.xfail(
+        reason="StringDType support not implemented yet", strict=True)),
+])
+class TestMethosWithoutStringDTypeSupport:
+    """
+    Tests shoud be moved to `TestMethods` once StringDType support is
+    implemeted
+    """
+
+    @pytest.mark.parametrize("in_,out", [
+        ('', False),
+        ('a', True),
+        ('A', True),
+        ('\n', False),
+        ('123abc456', True),
+        ('a1b3c', True),
+        ('aBc000 ', False),
+        ('abc\n', False),
+    ])
+    def test_isalnum(self, in_, out, dt):
+        in_ = np.array(in_, dtype=dt)
+        assert_array_equal(np.strings.isalnum(in_), out)
+
+    @pytest.mark.parametrize("in_,out", [
+        ('', False),
+        ('a', True),
+        ('A', False),
+        ('\n', False),
+        ('abc', True),
+        ('aBc', False),
+        ('abc\n', True),
+    ])
+    def test_islower(self, in_, out, dt):
+        in_ = np.array(in_, dtype=dt)
+        assert_array_equal(np.strings.islower(in_), out)
+
+    @pytest.mark.parametrize("in_,out", [
+        ('', False),
+        ('a', False),
+        ('A', True),
+        ('\n', False),
+        ('ABC', True),
+        ('AbC', False),
+        ('ABC\n', True),
+    ])
+    def test_isupper(self, in_, out, dt):
+        in_ = np.array(in_, dtype=dt)
+        assert_array_equal(np.strings.isupper(in_), out)
+
+    @pytest.mark.parametrize("in_,out", [
+        ('', False),
+        ('a', False),
+        ('A', True),
+        ('\n', False),
+        ('A Titlecased Line', True),
+        ('A\nTitlecased Line', True),
+        ('A Titlecased, Line', True),
+        ('Not a capitalized String', False),
+        ('Not\ta Titlecase String', False),
+        ('Not--a Titlecase String', False),
+        ('NOT', False),
+    ])
+    def test_istitle(self, in_, out, dt):
+        in_ = np.array(in_, dtype=dt)
+        assert_array_equal(np.strings.istitle(in_), out)
+
+
+@pytest.mark.parametrize("dt", ["U", "T"])
+class TestMethodsWithUnicode:
     @pytest.mark.parametrize("in_,out", [
         ("", False),
         ("a", False),
@@ -658,12 +655,6 @@ def test_isdecimal_unicode(self, in_, out, dt):
         buf = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.isdecimal(buf), out)
 
-    def test_isdecimal_bytes(self):
-        in_ = np.array(b"1")
-        with assert_raises(TypeError):
-            np.strings.isdecimal(in_)
-
-    @pytest.mark.parametrize("dt", ["U", "T"])
     @pytest.mark.parametrize("in_,out", [
         ("", False),
         ("a", False),
@@ -679,18 +670,23 @@ def test_isnumeric_unicode(self, in_, out, dt):
         buf = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.isnumeric(buf), out)
 
-    def test_isnumeric_bytes(self):
-        in_ = np.array(b"1")
-        with assert_raises(TypeError):
-            np.strings.isnumeric(in_)
-
-    @pytest.mark.parametrize("dt", ["U", "T"])
     def test_replace_unicode(self, dt):
         buf = np.array("...\u043c......<", dtype=dt)
         assert_array_equal(np.strings.replace(buf,  "<", "&lt;", MAX),
                            "...\u043c......&lt;")
 
-    @pytest.mark.parametrize("dt", ["U", "T"])
+
+@pytest.mark.parametrize("dt", [
+    "U",
+    pytest.param("T", marks=pytest.mark.xfail(
+        reason="StringDType support not implemented yet", strict=True)),
+])
+class TestMethodsWithoutStringDTypeSupportWithUnicode:
+    """
+    Tests shoud be moved to `TestMethods` once StringDType support is
+    implemeted
+    """
+
     @pytest.mark.parametrize("in_", [
         '\U00010401',
         '\U00010427',
@@ -702,13 +698,9 @@ def test_replace_unicode(self, dt):
         '\U0001F107',
     ])
     def test_isalnum_unicode(self, in_, dt):
-        # TODO: Remove this
-        if dt == "T":
-            pytest.xfail(
-                "StringDType support to be added in a follow-up commit")
+        in_ = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.isalnum(in_), True)
 
-    @pytest.mark.parametrize("dt", ["U", "T"])
     @pytest.mark.parametrize("in_,out", [
         ('\u1FFc', False),
         ('\u2167', False),
@@ -721,13 +713,9 @@ def test_isalnum_unicode(self, in_, dt):
         ('\U0001044E', True),
     ])
     def test_islower_unicode(self, in_, out, dt):
-        # TODO: Remove this
-        if dt == "T":
-            pytest.xfail(
-                "StringDType support to be added in a follow-up commit")
+        in_ = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.islower(in_), out)
 
-    @pytest.mark.parametrize("dt", ["U", "T"])
     @pytest.mark.parametrize("in_,out", [
         ('\u1FFc', False),
         ('\u2167', True),
@@ -740,13 +728,9 @@ def test_islower_unicode(self, in_, out, dt):
         ('\U0001044E', False),
     ])
     def test_isupper_unicode(self, in_, out, dt):
-        # TODO: Remove this
-        if dt == "T":
-            pytest.xfail(
-                "StringDType support to be added in a follow-up commit")
+        in_ = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.isupper(in_), out)
 
-    @pytest.mark.parametrize("dt", ["U", "T"])
     @pytest.mark.parametrize("in_,out", [
         ('\u1FFc', True),
         ('Greek \u1FFcitlecases ...', True),
@@ -758,13 +742,22 @@ def test_isupper_unicode(self, in_, out, dt):
         ('\U0001F46F', False),
     ])
     def test_istitle_unicode(self, in_, out, dt):
-        # TODO: Remove this
-        if dt == "T":
-            pytest.xfail(
-                "StringDType support to be added in a follow-up commit")
+        in_ = np.array(in_, dtype=dt)
         assert_array_equal(np.strings.istitle(in_), out)
 
 
+class TestUnicodeOnlyMethodsRaiseWithBytes:
+    def test_isdecimal_raises(self):
+        in_ = np.array(b"1")
+        with assert_raises(TypeError):
+            np.strings.isdecimal(in_)
+
+    def test_isnumeric_bytes(self):
+        in_ = np.array(b"1")
+        with assert_raises(TypeError):
+            np.strings.isnumeric(in_)
+
+
 def check_itemsize(n_elem, dt):
     if dt == "T":
         return np.dtype(dt).itemsize

From f324332a29c1d7e927592f4db53a5ca75e6382fb Mon Sep 17 00:00:00 2001
From: Nathan Goldbaum <nathan.goldbaum@gmail.com>
Date: Wed, 14 Feb 2024 12:49:55 -0700
Subject: [PATCH 5/9] ENH: implement isalnum, istitle, isupper, islower for
 stringdtype

---
 numpy/_core/src/umath/stringdtype_ufuncs.cpp | 92 ++++++++++++++++++++
 numpy/_core/tests/test_stringdtype.py        |  8 +-
 numpy/_core/tests/test_strings.py            | 25 ------
 3 files changed, 96 insertions(+), 29 deletions(-)

diff --git a/numpy/_core/src/umath/stringdtype_ufuncs.cpp b/numpy/_core/src/umath/stringdtype_ufuncs.cpp
index 647aa4d71ef3..1aa687adc000 100644
--- a/numpy/_core/src/umath/stringdtype_ufuncs.cpp
+++ b/numpy/_core/src/umath/stringdtype_ufuncs.cpp
@@ -1034,6 +1034,18 @@ struct call_buffer_function {
             case IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN:
                 *(T *)out = buf.num_codepoints();
                 break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM:
+                *(T *)out = buf.isalnum();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE:
+                *(T *)out = buf.istitle();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER:
+                *(T *)out = buf.isupper();
+                break;
+            case IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER:
+                *(T *)out = buf.islower();
+                break;
         }
     }
 };
@@ -1172,6 +1184,58 @@ string_isspace_strided_loop(PyArrayMethod_Context *context, char *const data[],
 }
 
 
+static const char isalnum_name[] = "isalnum";
+
+static int
+string_isalnum_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                            npy_intp const dimensions[],
+                            npy_intp const strides[],
+                            NpyAuxData *auxdata)
+{
+    return string_bool_output_unary_strided_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM, isalnum_name>(
+            context, data, dimensions, strides, auxdata);
+}
+
+
+static const char istitle_name[] = "istitle";
+
+static int
+string_istitle_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                            npy_intp const dimensions[],
+                            npy_intp const strides[],
+                            NpyAuxData *auxdata)
+{
+    return string_bool_output_unary_strided_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE, istitle_name>(
+            context, data, dimensions, strides, auxdata);
+}
+
+
+static const char islower_name[] = "islower";
+
+static int
+string_islower_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                            npy_intp const dimensions[],
+                            npy_intp const strides[],
+                            NpyAuxData *auxdata)
+{
+    return string_bool_output_unary_strided_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER, islower_name>(
+            context, data, dimensions, strides, auxdata);
+}
+
+
+static const char isupper_name[] = "isupper";
+
+static int
+string_isupper_strided_loop(PyArrayMethod_Context *context, char *const data[],
+                            npy_intp const dimensions[],
+                            npy_intp const strides[],
+                            NpyAuxData *auxdata)
+{
+    return string_bool_output_unary_strided_loop<IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER, isupper_name>(
+            context, data, dimensions, strides, auxdata);
+}
+
+
 static int
 string_strlen_strided_loop(PyArrayMethod_Context *context, char *const data[],
                            npy_intp const dimensions[],
@@ -2821,6 +2885,34 @@ init_stringdtype_ufuncs(PyObject *umath)
         return -1;
     }
 
+    if (init_ufunc(umath, "isalnum", bool_output_dtypes,
+                   &string_bool_output_resolve_descriptors,
+                   &string_isalnum_strided_loop, 1, 1, NPY_NO_CASTING,
+                   (NPY_ARRAYMETHOD_FLAGS) 0) < 0) {
+        return -1;
+    }
+
+    if (init_ufunc(umath, "istitle", bool_output_dtypes,
+                   &string_bool_output_resolve_descriptors,
+                   &string_istitle_strided_loop, 1, 1, NPY_NO_CASTING,
+                   (NPY_ARRAYMETHOD_FLAGS) 0) < 0) {
+        return -1;
+    }
+
+    if (init_ufunc(umath, "isupper", bool_output_dtypes,
+                   &string_bool_output_resolve_descriptors,
+                   &string_isupper_strided_loop, 1, 1, NPY_NO_CASTING,
+                   (NPY_ARRAYMETHOD_FLAGS) 0) < 0) {
+        return -1;
+    }
+
+    if (init_ufunc(umath, "islower", bool_output_dtypes,
+                   &string_bool_output_resolve_descriptors,
+                   &string_islower_strided_loop, 1, 1, NPY_NO_CASTING,
+                   (NPY_ARRAYMETHOD_FLAGS) 0) < 0) {
+        return -1;
+    }
+
     PyArray_DTypeMeta *intp_output_dtypes[] = {
         &PyArray_StringDType,
         &PyArray_IntpDType
diff --git a/numpy/_core/tests/test_stringdtype.py b/numpy/_core/tests/test_stringdtype.py
index 9e562de81cfc..b14c41586113 100644
--- a/numpy/_core/tests/test_stringdtype.py
+++ b/numpy/_core/tests/test_stringdtype.py
@@ -952,15 +952,15 @@ def unicode_array():
     "upper",
     "isnumeric",
     "isdecimal",
+    "isalnum",
+    "islower",
+    "istitle",
+    "isupper",
 ]
 
 UNIMPLEMENTED_VEC_STRING_FUNCTIONS = [
     "capitalize",
     "expandtabs",
-    "isalnum",
-    "islower",
-    "istitle",
-    "isupper",
     "lower",
     "splitlines",
     "swapcase",
diff --git a/numpy/_core/tests/test_strings.py b/numpy/_core/tests/test_strings.py
index 312495e414af..57940a118a94 100644
--- a/numpy/_core/tests/test_strings.py
+++ b/numpy/_core/tests/test_strings.py
@@ -567,19 +567,6 @@ def test_replace(self, buf, old, new, count, res, dt):
         res = np.array(res, dtype=dt)
         assert_array_equal(np.strings.replace(buf, old, new, count), res)
 
-
-@pytest.mark.parametrize("dt", [
-    "S",
-    "U",
-    pytest.param("T", marks=pytest.mark.xfail(
-        reason="StringDType support not implemented yet", strict=True)),
-])
-class TestMethosWithoutStringDTypeSupport:
-    """
-    Tests shoud be moved to `TestMethods` once StringDType support is
-    implemeted
-    """
-
     @pytest.mark.parametrize("in_,out", [
         ('', False),
         ('a', True),
@@ -675,18 +662,6 @@ def test_replace_unicode(self, dt):
         assert_array_equal(np.strings.replace(buf,  "<", "&lt;", MAX),
                            "...\u043c......&lt;")
 
-
-@pytest.mark.parametrize("dt", [
-    "U",
-    pytest.param("T", marks=pytest.mark.xfail(
-        reason="StringDType support not implemented yet", strict=True)),
-])
-class TestMethodsWithoutStringDTypeSupportWithUnicode:
-    """
-    Tests shoud be moved to `TestMethods` once StringDType support is
-    implemeted
-    """
-
     @pytest.mark.parametrize("in_", [
         '\U00010401',
         '\U00010427',

From a9711bb7f713a2237a2c6a6640997e71b81ff5b7 Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Fri, 16 Feb 2024 11:15:24 +0100
Subject: [PATCH 6/9] Separate str_len / bool methods; use static_data approach

---
 numpy/_core/src/umath/string_ufuncs.cpp | 224 ++++++++----------------
 1 file changed, 75 insertions(+), 149 deletions(-)

diff --git a/numpy/_core/src/umath/string_ufuncs.cpp b/numpy/_core/src/umath/string_ufuncs.cpp
index 2ecdaa527985..588deaf3001e 100644
--- a/numpy/_core/src/umath/string_ufuncs.cpp
+++ b/numpy/_core/src/umath/string_ufuncs.cpp
@@ -99,52 +99,41 @@ string_comparison_loop(PyArrayMethod_Context *context,
 }
 
 
-template <ENCODING enc, IMPLEMENTED_UNARY_FUNCTIONS f, typename T>
-struct call_buffer_unary_function {
-    void operator()(const char *buffer, size_t size, char *out) {
-        Buffer<enc> buf((char *)buffer, size);
-        switch (f) {
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISALPHA:
-                *(T *)out = buf.isalpha();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL:
-                *(T *)out = buf.isdecimal();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT:
-                *(T *)out = buf.isdigit();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC:
-                *(T *)out = buf.isnumeric();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE:
-                *(T *)out = buf.isspace();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM:
-                *(T *)out = buf.isalnum();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER:
-                *(T *)out = buf.islower();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER:
-                *(T *)out = buf.isupper();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE:
-                *(T *)out = buf.istitle();
-                break;
-            case IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN:
-                *(T *)out = buf.num_codepoints();
-                break;
-        }
+template <ENCODING enc>
+static int
+string_str_len_loop(PyArrayMethod_Context *context,
+                    char *const data[], npy_intp const dimensions[],
+                    npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+{
+    int elsize = context->descriptors[0]->elsize;
+
+    char *in = data[0];
+    char *out = data[1];
+
+    npy_intp N = dimensions[0];
+
+    while (N--) {
+        Buffer<enc> buf(in, elsize);
+        *(npy_intp *)out = buf.num_codepoints();
+
+        in += strides[0];
+        out += strides[1];
     }
-};
+
+    return 0;
+}
 
 
-template <ENCODING enc, IMPLEMENTED_UNARY_FUNCTIONS f, typename return_value>
+template <ENCODING enc>
+using buffer_method = bool (Buffer<enc>::*)();
+
+template <ENCODING enc>
 static int
 string_unary_loop(PyArrayMethod_Context *context,
                   char *const data[], npy_intp const dimensions[],
                   npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
 {
+    buffer_method<enc> is_it = *(buffer_method<enc> *)(context->method->static_data);
     int elsize = context->descriptors[0]->elsize;
 
     char *in = data[0];
@@ -153,8 +142,8 @@ string_unary_loop(PyArrayMethod_Context *context,
     npy_intp N = dimensions[0];
 
     while (N--) {
-        call_buffer_unary_function<enc, f, return_value> cbuf;
-        cbuf(in, (size_t) elsize, out);
+        Buffer<enc> buf(in, elsize);
+        *(npy_bool *)out = (buf.*is_it)();
 
         in += strides[0];
         out += strides[1];
@@ -939,30 +928,61 @@ init_string_ufuncs(PyObject *umath)
     dtypes[1] = NPY_DEFAULT_INT;
     if (init_ufunc(
             umath, "str_len", 1, 1, dtypes, ENCODING::ASCII,
-            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN, npy_intp>,
-            NULL, NULL) < 0) {
+            string_str_len_loop<ENCODING::ASCII>, NULL, NULL) < 0) {
         return -1;
     }
     if (init_ufunc(
             umath, "str_len", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::STR_LEN, npy_intp>,
-            NULL, NULL) < 0) {
+            string_str_len_loop<ENCODING::UTF32>, NULL, NULL) < 0) {
         return -1;
     }
 
     dtypes[0] = NPY_OBJECT;
     dtypes[1] = NPY_BOOL;
-    if (init_ufunc(
-            umath, "isalpha", 1, 1, dtypes, ENCODING::ASCII,
-            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISALPHA, npy_bool>,
-            NULL, NULL) < 0) {
-        return -1;
-    }
-    if (init_ufunc(
-            umath, "isalpha", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISALPHA, npy_bool>,
-            NULL, NULL) < 0) {
-        return -1;
+
+    const char *unary_buffer_method_names[] = {
+        "isalpha", "isalnum", "isdigit", "isspace", "islower",
+        "isupper", "istitle", "isdecimal", "isnumeric",
+    };
+
+    static buffer_method<ENCODING::ASCII> unary_buffer_ascii_methods[] = {
+        &Buffer<ENCODING::ASCII>::isalpha,
+        &Buffer<ENCODING::ASCII>::isalnum,
+        &Buffer<ENCODING::ASCII>::isdigit,
+        &Buffer<ENCODING::ASCII>::isspace,
+        &Buffer<ENCODING::ASCII>::islower,
+        &Buffer<ENCODING::ASCII>::isupper,
+        &Buffer<ENCODING::ASCII>::istitle,
+    };
+
+    static buffer_method<ENCODING::UTF32> unary_buffer_utf32_methods[] = {
+        &Buffer<ENCODING::UTF32>::isalpha,
+        &Buffer<ENCODING::UTF32>::isalnum,
+        &Buffer<ENCODING::UTF32>::isdigit,
+        &Buffer<ENCODING::UTF32>::isspace,
+        &Buffer<ENCODING::UTF32>::islower,
+        &Buffer<ENCODING::UTF32>::isupper,
+        &Buffer<ENCODING::UTF32>::istitle,
+        &Buffer<ENCODING::UTF32>::isdecimal,
+        &Buffer<ENCODING::UTF32>::isnumeric,
+    };
+
+    for (int i = 0; i < 9; i++) {
+        if (i < 7) { // isdecimal & isnumeric do not support ASCII
+            if (init_ufunc(
+                    umath, unary_buffer_method_names[i], 1, 1, dtypes, ENCODING::ASCII,
+                    string_unary_loop<ENCODING::ASCII>, NULL,
+                    &unary_buffer_ascii_methods[i]) < 0) {
+                return -1;
+            }
+        }
+
+        if (init_ufunc(
+                umath, unary_buffer_method_names[i], 1, 1, dtypes, ENCODING::UTF32,
+                string_unary_loop<ENCODING::UTF32>, NULL,
+                &unary_buffer_utf32_methods[i]) < 0) {
+            return -1;
+        }
     }
 
     dtypes[0] = dtypes[1] = NPY_OBJECT;
@@ -1060,100 +1080,6 @@ init_string_ufuncs(PyObject *umath)
         return -1;
     }
 
-    dtypes[0] = NPY_OBJECT;
-    dtypes[1] = NPY_BOOL;
-    if (init_ufunc(
-            umath, "isdigit", "templated_string_isdigit", 1, 1, dtypes, ENCODING::ASCII
-            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-    if (init_ufunc(
-            umath, "isdigit", "templated_string_isdigit", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISDIGIT, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-
-    if (init_ufunc(
-            umath, "isspace", "templated_string_isspace", 1, 1, dtypes, ENCODING::ASCII,
-            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-    if (init_ufunc(
-            umath, "isspace", "templated_string_isspace", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISSPACE, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-
-    if (init_ufunc(
-            umath, "isalnum", "templated_string_isalnum", 1, 1, dtypes, ENCODING::ASCII,
-            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-    if (init_ufunc(
-            umath, "isalnum", "templated_string_isalnum", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISALNUM, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-
-    if (init_ufunc(
-            umath, "islower", "templated_string_islower", 1, 1, dtypes, ENCODING::ASCII,
-            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-    if (init_ufunc(
-            umath, "islower", "templated_string_islower", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISLOWER, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-
-    if (init_ufunc(
-            umath, "isupper", "templated_string_isupper", 1, 1, dtypes, ENCODING::ASCII,
-            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-    if (init_ufunc(
-            umath, "isupper", "templated_string_isupper", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISUPPER, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-
-    if (init_ufunc(
-            umath, "istitle", "templated_string_istitle", 1, 1, dtypes, ENCODING::ASCII,
-            string_unary_loop<ENCODING::ASCII, IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-    if (init_ufunc(
-            umath, "istitle", "templated_string_istitle", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISTITLE, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-
-    if (init_ufunc(
-            umath, "isdecimal", "templated_string_isdecimal", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISDECIMAL, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-
-    if (init_ufunc(
-            umath, "isnumeric", "templated_string_isnumeric", 1, 1, dtypes, ENCODING::UTF32,
-            string_unary_loop<ENCODING::UTF32, IMPLEMENTED_UNARY_FUNCTIONS::ISNUMERIC, npy_bool>,
-            NULL) < 0) {
-        return -1;
-    }
-
     dtypes[0] = dtypes[1] = NPY_OBJECT;
     if (init_ufunc(
             umath, "_lstrip_whitespace", 1, 1, dtypes, ENCODING::ASCII,

From 2bc79d796f66312de7ba051ac3bf55c1b355b031 Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Fri, 16 Feb 2024 11:50:27 +0100
Subject: [PATCH 7/9] Implement NumPyOS_ascii_islower/isupper and use those

---
 numpy/_core/src/common/numpyos.c      | 22 ++++++++++++++++++++++
 numpy/_core/src/common/numpyos.h      |  6 ++++++
 numpy/_core/src/umath/string_buffer.h |  4 ++--
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/numpy/_core/src/common/numpyos.c b/numpy/_core/src/common/numpyos.c
index 4b589966591e..38c77616daa2 100644
--- a/numpy/_core/src/common/numpyos.c
+++ b/numpy/_core/src/common/numpyos.c
@@ -387,6 +387,28 @@ NumPyOS_ascii_isalnum(char c)
     return NumPyOS_ascii_isdigit(c) || NumPyOS_ascii_isalpha(c);
 }
 
+/*
+ * NumPyOS_ascii_islower:
+ *
+ * Same as islower under C locale
+ */
+NPY_NO_EXPORT int
+NumPyOS_ascii_islower(char c)
+{
+    return c >= 'a' && c <= 'z';
+}
+
+/*
+ * NumPyOS_ascii_isupper:
+ *
+ * Same as isupper under C locale
+ */
+NPY_NO_EXPORT int
+NumPyOS_ascii_isupper(char c)
+{
+    return c >= 'A' && c <= 'Z';
+}
+
 
 /*
  * NumPyOS_ascii_tolower:
diff --git a/numpy/_core/src/common/numpyos.h b/numpy/_core/src/common/numpyos.h
index 2b2b88bdc954..980b8f6c0578 100644
--- a/numpy/_core/src/common/numpyos.h
+++ b/numpy/_core/src/common/numpyos.h
@@ -44,6 +44,12 @@ NumPyOS_ascii_isdigit(char c);
 NPY_NO_EXPORT int
 NumPyOS_ascii_isalnum(char c);
 
+NPY_NO_EXPORT int
+NumPyOS_ascii_islower(char c);
+
+NPY_NO_EXPORT int
+NumPyOS_ascii_isupper(char c);
+
 /* Convert a string to an int in an arbitrary base */
 NPY_NO_EXPORT npy_longlong
 NumPyOS_strtoll(const char *str, char **endptr, int base);
diff --git a/numpy/_core/src/umath/string_buffer.h b/numpy/_core/src/umath/string_buffer.h
index b6002b0fddd1..ca8a5b197a51 100644
--- a/numpy/_core/src/umath/string_buffer.h
+++ b/numpy/_core/src/umath/string_buffer.h
@@ -174,7 +174,7 @@ template<>
 inline bool
 codepoint_islower<ENCODING::ASCII>(npy_ucs4 code)
 {
-    return Py_ISLOWER((char) code);
+    return NumPyOS_ascii_islower(code);
 }
 
 template<>
@@ -199,7 +199,7 @@ template<>
 inline bool
 codepoint_isupper<ENCODING::ASCII>(npy_ucs4 code)
 {
-    return Py_ISUPPER((char) code);
+    return NumPyOS_ascii_isupper(code);
 }
 
 template<>

From 1cc073381266a7dabe68ebe31384c68906e33a77 Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Fri, 16 Feb 2024 18:59:36 +0100
Subject: [PATCH 8/9] Address feedback; remove unnecessary assignment & change
 typedef to using

---
 numpy/_core/src/umath/string_buffer.h        | 1 -
 numpy/_core/src/umath/stringdtype_ufuncs.cpp | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/numpy/_core/src/umath/string_buffer.h b/numpy/_core/src/umath/string_buffer.h
index ca8a5b197a51..a3be7b149e9f 100644
--- a/numpy/_core/src/umath/string_buffer.h
+++ b/numpy/_core/src/umath/string_buffer.h
@@ -567,7 +567,6 @@ struct Buffer {
                 if (!previous_is_cased) {
                     return false;
                 }
-                previous_is_cased = true;
                 cased = true;
             }
             else {
diff --git a/numpy/_core/src/umath/stringdtype_ufuncs.cpp b/numpy/_core/src/umath/stringdtype_ufuncs.cpp
index 74b210ac091b..e064235762b3 100644
--- a/numpy/_core/src/umath/stringdtype_ufuncs.cpp
+++ b/numpy/_core/src/umath/stringdtype_ufuncs.cpp
@@ -626,7 +626,7 @@ string_intp_output_resolve_descriptors(
     return NPY_NO_CASTING;
 }
 
-typedef bool (Buffer<ENCODING::UTF8>::*utf8_buffer_method)();
+using utf8_buffer_method = bool (Buffer<ENCODING::UTF8>::*)();
 
 static int
 string_bool_output_unary_strided_loop(

From 2214aeed4e5bb78a542eada3a7753c844468511e Mon Sep 17 00:00:00 2001
From: Lysandros Nikolaou <lisandrosnik@gmail.com>
Date: Fri, 16 Feb 2024 22:34:05 +0100
Subject: [PATCH 9/9] Mark some is* tests as xfail under PyPy on Windows

---
 numpy/_core/tests/test_strings.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/numpy/_core/tests/test_strings.py b/numpy/_core/tests/test_strings.py
index 00357e67d49c..e6dd3552010e 100644
--- a/numpy/_core/tests/test_strings.py
+++ b/numpy/_core/tests/test_strings.py
@@ -1,9 +1,10 @@
+import sys
 import pytest
 
 import operator
 import numpy as np
 
-from numpy.testing import assert_array_equal, assert_raises
+from numpy.testing import assert_array_equal, assert_raises, IS_PYPY
 
 
 COMPARISONS = [
@@ -719,7 +720,10 @@ def test_replace_unicode(self, dt):
         '\U0001D7F6',
         '\U00011066',
         '\U000104A0',
-        '\U0001F107',
+        pytest.param('\U0001F107', marks=pytest.mark.xfail(
+            sys.platform == 'win32' and IS_PYPY,
+            reason="PYPY bug in Py_UNICODE_ISALNUM",
+            strict=True)),
     ])
     def test_isalnum_unicode(self, in_, dt):
         in_ = np.array(in_, dtype=dt)
@@ -733,7 +737,10 @@ def test_isalnum_unicode(self, in_, dt):
         ('\U0001F40D', False),
         ('\U0001F46F', False),
         ('\u2177', True),
-        ('\U00010429', True),
+        pytest.param('\U00010429', True, marks=pytest.mark.xfail(
+            sys.platform == 'win32' and IS_PYPY,
+            reason="PYPY bug in Py_UNICODE_ISLOWER",
+            strict=True)),
         ('\U0001044E', True),
     ])
     def test_islower_unicode(self, in_, out, dt):
@@ -748,7 +755,10 @@ def test_islower_unicode(self, in_, out, dt):
         ('\U0001F40D', False),
         ('\U0001F46F', False),
         ('\u2177', False),
-        ('\U00010429', False),
+        pytest.param('\U00010429', False, marks=pytest.mark.xfail(
+            sys.platform == 'win32' and IS_PYPY,
+            reason="PYPY bug in Py_UNICODE_ISUPPER",
+            strict=True)),
         ('\U0001044E', False),
     ])
     def test_isupper_unicode(self, in_, out, dt):
@@ -758,9 +768,15 @@ def test_isupper_unicode(self, in_, out, dt):
     @pytest.mark.parametrize("in_,out", [
         ('\u1FFc', True),
         ('Greek \u1FFcitlecases ...', True),
-        ('\U00010401\U00010429', True),
+        pytest.param('\U00010401\U00010429', True, marks=pytest.mark.xfail(
+            sys.platform == 'win32' and IS_PYPY,
+            reason="PYPY bug in Py_UNICODE_ISISTITLE",
+            strict=True)),
         ('\U00010427\U0001044E', True),
-        ('\U00010429', False),
+        pytest.param('\U00010429', False, marks=pytest.mark.xfail(
+            sys.platform == 'win32' and IS_PYPY,
+            reason="PYPY bug in Py_UNICODE_ISISTITLE",
+            strict=True)),
         ('\U0001044E', False),
         ('\U0001F40D', False),
         ('\U0001F46F', False),