numpy · ngoldbaum · Feb 6, 2023 · Feb 1, 2023 · Feb 1, 2023 · Feb 1, 2023
diff --git a/asciidtype/asciidtype/__init__.py b/asciidtype/asciidtype/__init__.py
@@ -7,4 +7,7 @@
 from .scalar import ASCIIScalar  # isort: skip
 from ._asciidtype_main import ASCIIDType
 
-__all__ = ["ASCIIDType", "ASCIIScalar"]
+__all__ = [
+    "ASCIIDType",
+    "ASCIIScalar",
+]
diff --git a/asciidtype/asciidtype/src/dtype.c b/asciidtype/asciidtype/src/dtype.c
@@ -226,6 +226,82 @@ static PyMemberDef ASCIIDType_members[] = {
         {NULL},
 };
 
+static int PICKLE_VERSION = 1;
+
+static PyObject *
+asciidtype__reduce__(ASCIIDTypeObject *self)
+{
+    PyObject *ret, *mod, *obj, *state;
+
+    ret = PyTuple_New(3);
+    if (ret == NULL) {
+        return NULL;
+    }
+
+    mod = PyImport_ImportModule("asciidtype");
+    if (mod == NULL) {
+        Py_DECREF(ret);
+        return NULL;
+    }
+
+    obj = PyObject_GetAttrString(mod, "ASCIIDType");
+    Py_DECREF(mod);
+    if (obj == NULL) {
+        Py_DECREF(ret);
+        return NULL;
+    }
+
+    PyTuple_SET_ITEM(ret, 0, obj);
+
+    PyTuple_SET_ITEM(ret, 1, Py_BuildValue("(l)", self->size));
+
+    state = PyTuple_New(1);
+
+    PyTuple_SET_ITEM(state, 0, PyLong_FromLong(PICKLE_VERSION));
+
+    PyTuple_SET_ITEM(ret, 2, state);
+
+    return ret;
+}
+
+static PyObject *
+asciidtype__setstate__(ASCIIDTypeObject *NPY_UNUSED(self), PyObject *args)
+{
+    if (PyTuple_GET_SIZE(args) != 1 ||
+        !(PyLong_Check(PyTuple_GET_ITEM(args, 0)))) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    long version = PyLong_AsLong(PyTuple_GET_ITEM(args, 0));
+
+    if (version != PICKLE_VERSION) {
+        PyErr_Format(PyExc_ValueError,
+                     "Pickle version mismatch. Got version %d but expected "
+                     "version %d.",
+                     version, PICKLE_VERSION);
+        return NULL;
+    }
+
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef ASCIIDType_methods[] = {
+        {
+                "__reduce__",
+                (PyCFunction)asciidtype__reduce__,
+                METH_NOARGS,
+                "Reduction method for an ASCIIDType object",
+        },
+        {
+                "__setstate__",
+                (PyCFunction)asciidtype__setstate__,
+                METH_O,
+                "Unpickle an ASCIIDType object",
+        },
+        {NULL},
+};
+
 /*
  * This is the basic things that you need to create a Python Type/Class in C.
  * However, there is a slight difference here because we create a
@@ -242,6 +318,7 @@ PyArray_DTypeMeta ASCIIDType = {
                 .tp_repr = (reprfunc)asciidtype_repr,
                 .tp_str = (reprfunc)asciidtype_repr,
                 .tp_members = ASCIIDType_members,
+                .tp_methods = ASCIIDType_methods,
         }},
         /* rest, filled in during DTypeMeta initialization */
 };

diff --git a/asciidtype/tests/test_asciidtype.py b/asciidtype/tests/test_asciidtype.py
@@ -1,4 +1,7 @@
+import os
+import pickle
 import re
+import tempfile
 
 import numpy as np
 import pytest
@@ -230,3 +233,18 @@ def test_insert_scalar_directly():
     val = arr[0]
     arr[1] = val
     np.testing.assert_array_equal(arr, np.array(["some", "some"], dtype=dtype))
+
+
+def test_pickle():
+    dtype = ASCIIDType(6)
+    arr = np.array(["this", "is", "an", "array"], dtype=dtype)
+    with tempfile.NamedTemporaryFile("wb", delete=False) as f:
+        pickle.dump([arr, dtype], f)
+
+    with open(f.name, "rb") as f:
+        res = pickle.load(f)
+
+    np.testing.assert_array_equal(arr, res[0])
+    assert res[1] == dtype
+
+    os.remove(f.name)
diff --git a/stringdtype/stringdtype/__init__.py b/stringdtype/stringdtype/__init__.py
@@ -1,12 +1,11 @@
-"""A dtype for working with string data
+"""A dtype for working with variable-length string data
 
-This is an example usage of the experimental new dtype API
-in Numpy and is not intended for any real purpose.
 """
 
 from .scalar import StringScalar  # isort: skip
 from ._main import StringDType, _memory_usage
 
+
 __all__ = [
     "StringDType",
     "StringScalar",

diff --git a/stringdtype/stringdtype/scalar.py b/stringdtype/stringdtype/scalar.py
@@ -2,11 +2,6 @@
 
 
 class StringScalar(str):
-    def __new__(cls, value, dtype):
-        instance = super().__new__(cls, value)
-        instance.dtype = dtype
-        return instance
-
     def partition(self, sep):
         ret = super().partition(sep)
         return (str(ret[0]), str(ret[1]), str(ret[2]))

diff --git a/stringdtype/stringdtype/src/dtype.c b/stringdtype/stringdtype/src/dtype.c
@@ -19,6 +19,7 @@ new_stringdtype_instance(void)
     new->base.elsize = sizeof(ss *);
     new->base.alignment = _Alignof(ss *);
     new->base.flags |= NPY_NEEDS_INIT;
+    new->base.flags |= NPY_LIST_PICKLE;
 
     return new;
 }
@@ -68,7 +69,7 @@ string_discover_descriptor_from_pyobject(PyArray_DTypeMeta *NPY_UNUSED(cls),
         return NULL;
     }
 
-    PyArray_Descr *ret = (PyArray_Descr *)PyObject_GetAttrString(obj, "dtype");
+    PyArray_Descr *ret = (PyArray_Descr *)new_stringdtype_instance();
     if (ret == NULL) {
         return NULL;
     }
@@ -143,7 +144,7 @@ stringdtype_getitem(StringDTypeObject *descr, char **dataptr)
     }
 
     PyObject *res = PyObject_CallFunctionObjArgs((PyObject *)StringScalar_Type,
-                                                 val_obj, descr, NULL);
+                                                 val_obj, NULL);
 
     if (res == NULL) {
         return NULL;
@@ -200,6 +201,82 @@ stringdtype_repr(StringDTypeObject *NPY_UNUSED(self))
     return PyUnicode_FromString("StringDType()");
 }
 
+static int PICKLE_VERSION = 1;
+
+static PyObject *
+stringdtype__reduce__(StringDTypeObject *NPY_UNUSED(self))
+{
+    PyObject *ret, *mod, *obj, *state;
+
+    ret = PyTuple_New(3);
+    if (ret == NULL) {
+        return NULL;
+    }
+
+    mod = PyImport_ImportModule("stringdtype");
+    if (mod == NULL) {
+        Py_DECREF(ret);
+        return NULL;
+    }
+
+    obj = PyObject_GetAttrString(mod, "StringDType");
+    Py_DECREF(mod);
+    if (obj == NULL) {
+        Py_DECREF(ret);
+        return NULL;
+    }
+
+    PyTuple_SET_ITEM(ret, 0, obj);
+
+    PyTuple_SET_ITEM(ret, 1, PyTuple_New(0));
+
+    state = PyTuple_New(1);
+
+    PyTuple_SET_ITEM(state, 0, PyLong_FromLong(PICKLE_VERSION));
+
+    PyTuple_SET_ITEM(ret, 2, state);
+
+    return ret;
+}
+
+static PyObject *
+stringdtype__setstate__(StringDTypeObject *NPY_UNUSED(self), PyObject *args)
+{
+    if (PyTuple_GET_SIZE(args) != 1 ||
+        !(PyLong_Check(PyTuple_GET_ITEM(args, 0)))) {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+
+    long version = PyLong_AsLong(PyTuple_GET_ITEM(args, 0));
+
+    if (version != PICKLE_VERSION) {
+        PyErr_Format(PyExc_ValueError,
+                     "Pickle version mismatch. Got version %d but expected "
+                     "version %d.",
+                     version, PICKLE_VERSION);
+        return NULL;
+    }
+
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef StringDType_methods[] = {
+        {
+                "__reduce__",
+                (PyCFunction)stringdtype__reduce__,
+                METH_NOARGS,
+                "Reduction method for an StringDType object",
+        },
+        {
+                "__setstate__",
+                (PyCFunction)stringdtype__setstate__,
+                METH_O,
+                "Unpickle an StringDType object",
+        },
+        {NULL},
+};
+
 /*
  * This is the basic things that you need to create a Python Type/Class in C.
  * However, there is a slight difference here because we create a
@@ -215,6 +292,7 @@ PyArray_DTypeMeta StringDType = {
                 .tp_dealloc = (destructor)stringdtype_dealloc,
                 .tp_repr = (reprfunc)stringdtype_repr,
                 .tp_str = (reprfunc)stringdtype_repr,
+                .tp_methods = StringDType_methods,
         }},
         /* rest, filled in during DTypeMeta initialization */
 };

diff --git a/stringdtype/tests/test_stringdtype.py b/stringdtype/tests/test_stringdtype.py
@@ -1,3 +1,8 @@
+import concurrent.futures
+import os
+import pickle
+import tempfile
+
 import numpy as np
 import pytest
 
@@ -10,7 +15,7 @@ def string_list():
 
 
 def test_scalar_creation():
-    assert str(StringScalar("abc", StringDType())) == "abc"
+    assert str(StringScalar("abc")) == "abc"
 
 
 def test_dtype_creation():
@@ -38,12 +43,11 @@ def test_array_creation_utf8(data):
 
 
 def test_array_creation_scalars(string_list):
-    dtype = StringDType()
     arr = np.array(
         [
-            StringScalar("abc", dtype=dtype),
-            StringScalar("def", dtype=dtype),
-            StringScalar("ghi", dtype=dtype),
+            StringScalar("abc"),
+            StringScalar("def"),
+            StringScalar("ghi"),
         ]
     )
     assert repr(arr) == repr(np.array(string_list, dtype=StringDType()))
@@ -94,7 +98,7 @@ def test_unicode_casts(string_list):
 def test_insert_scalar(string_list):
     dtype = StringDType()
     arr = np.array(string_list, dtype=dtype)
-    arr[1] = StringScalar("what", dtype=dtype)
+    arr[1] = StringScalar("what")
     assert repr(arr) == repr(np.array(["abc", "what", "ghi"], dtype=dtype))
 
 
@@ -124,3 +128,36 @@ def test_memory_usage(string_list):
         _memory_usage("hello")
     with pytest.raises(TypeError):
         _memory_usage(np.array([1, 2, 3]))
+
+
+def _pickle_load(filename):
+    with open(filename, "rb") as f:
+        res = pickle.load(f)
+
+    return res
+
+
+def test_pickle(string_list):
+    dtype = StringDType()
+
+    arr = np.array(string_list, dtype=dtype)
+
+    with tempfile.NamedTemporaryFile("wb", delete=False) as f:
+        pickle.dump([arr, dtype], f)
+
+    with open(f.name, "rb") as f:
+        res = pickle.load(f)
+
+    np.testing.assert_array_equal(res[0], arr)
+    assert res[1] == dtype
+
+    # load the pickle in a subprocess to ensure the string data are
+    # actually stored in the pickle file
+    with concurrent.futures.ProcessPoolExecutor() as executor:
+        e = executor.submit(_pickle_load, f.name)
+        res = e.result()
+
+    np.testing.assert_array_equal(res[0], arr)
+    assert res[1] == dtype
+
+    os.remove(f.name)