Skip to content

Commit

Permalink
MAINT,TST,BUG: Simplify streamer init, fix issues, and add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
seberg committed Jan 15, 2022
1 parent 08fa5ce commit 73940d6
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 54 deletions.
1 change: 0 additions & 1 deletion numpy/core/src/multiarray/textreading/readtext.c
Expand Up @@ -214,7 +214,6 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
s = stream_python_iterable(file, encoding);
}
if (s == NULL) {
PyErr_Format(PyExc_RuntimeError, "Unable to access the file.");
PyMem_FREE(usecols);
return NULL;
}
Expand Down
14 changes: 12 additions & 2 deletions numpy/core/src/multiarray/textreading/stream.h
Expand Up @@ -15,8 +15,18 @@
#define BUFFER_IS_LINEND 1
#define BUFFER_IS_FILEEND 2

/*
* Base struct for streams. We currently have two, a chunked reader for
* filelikes and a line-by-line for any iterable.
* As of writing, the chunked reader was only used for filelikes not already
* opened. That is to preserve the amount read in case of an error exactly.
* If we drop this, we could read it more often (but not when `max_rows` is
* used).
*
* The "streams" can extend this struct to store their own data (so it is
* a very lightweight "object").
*/
typedef struct _stream {
void *stream_data;
int (*stream_nextbuf)(void *sdata, char **start, char **end, int *kind);
// Note that the first argument to stream_close is the stream pointer
// itself, not the stream_data pointer.
Expand All @@ -25,7 +35,7 @@ typedef struct _stream {


#define stream_nextbuf(s, start, end, kind) \
((s)->stream_nextbuf((s)->stream_data, start, end, kind))
((s)->stream_nextbuf((s), start, end, kind))
#define stream_close(s) ((s)->stream_close((s)))

#endif /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_STREAM_H_ */
73 changes: 22 additions & 51 deletions numpy/core/src/multiarray/textreading/stream_pyobject.c
Expand Up @@ -19,6 +19,7 @@


typedef struct {
stream stream;
/* The Python file object being read. */
PyObject *file;

Expand Down Expand Up @@ -111,14 +112,13 @@ fb_nextbuf(python_chunks_from_file *fb, char **start, char **end, int *kind)
static int
fb_del(stream *strm)
{
python_chunks_from_file *fb = (python_chunks_from_file *)strm->stream_data;
python_chunks_from_file *fb = (python_chunks_from_file *)strm;

Py_XDECREF(fb->file);
Py_XDECREF(fb->read);
Py_XDECREF(fb->chunksize);
Py_XDECREF(fb->chunk);

PyMem_FREE(fb);
PyMem_FREE(strm);

return 0;
Expand All @@ -129,29 +129,19 @@ NPY_NO_EXPORT stream *
stream_python_file(PyObject *obj, const char *encoding)
{
python_chunks_from_file *fb;
stream *strm;

fb = (python_chunks_from_file *)PyMem_MALLOC(sizeof(python_chunks_from_file));
fb = (python_chunks_from_file *)PyMem_Calloc(1, sizeof(python_chunks_from_file));
if (fb == NULL) {
PyErr_NoMemory();
return NULL;
}

fb->file = NULL;
fb->read = NULL;
fb->chunksize = NULL;
fb->chunk = NULL;
fb->encoding = encoding;

strm = (stream *)PyMem_MALLOC(sizeof(stream));
if (strm == NULL) {
PyErr_NoMemory();
PyMem_FREE(fb);
return NULL;
}
fb->stream.stream_nextbuf = (void *)&fb_nextbuf;
fb->stream.stream_close = &fb_del;

fb->encoding = encoding;
Py_INCREF(obj);
fb->file = obj;
Py_INCREF(fb->file);

fb->read = PyObject_GetAttrString(obj, "read");
if (fb->read == NULL) {
Expand All @@ -162,14 +152,10 @@ stream_python_file(PyObject *obj, const char *encoding)
goto fail;
}

strm->stream_data = (void *)fb;
strm->stream_nextbuf = (void *)&fb_nextbuf;
strm->stream_close = &fb_del;

return strm;
return (stream *)fb;

fail:
fb_del(strm);
fb_del((stream *)fb);
return NULL;
}

Expand All @@ -178,6 +164,7 @@ stream_python_file(PyObject *obj, const char *encoding)
* Stream from a Python iterable by interpreting each item as a line in a file
*/
typedef struct {
stream stream;
/* The Python file object being read. */
PyObject *iterator;

Expand All @@ -192,14 +179,12 @@ typedef struct {
static int
it_del(stream *strm)
{
python_lines_from_iterator *it = (python_lines_from_iterator *)strm->stream_data;
python_lines_from_iterator *it = (python_lines_from_iterator *)strm;

Py_XDECREF(it->iterator);
Py_XDECREF(it->line);

PyMem_FREE(it);
PyMem_FREE(strm);

return 0;
}

Expand Down Expand Up @@ -233,39 +218,25 @@ NPY_NO_EXPORT stream *
stream_python_iterable(PyObject *obj, const char *encoding)
{
python_lines_from_iterator *it;
stream *strm;

it = (python_lines_from_iterator *)PyMem_MALLOC(sizeof(*it));
if (it == NULL) {
PyErr_NoMemory();
if (!PyIter_Check(obj)) {
PyErr_SetString(PyExc_TypeError,
"error reading from object, expected an iterable.");
return NULL;
}

it->iterator = NULL;
it->line = NULL;
it->encoding = encoding;

strm = (stream *)PyMem_MALLOC(sizeof(stream));
if (strm == NULL) {
it = (python_lines_from_iterator *)PyMem_Calloc(1, sizeof(*it));
if (it == NULL) {
PyErr_NoMemory();
PyMem_FREE(it);
return NULL;
}
if (!PyIter_Check(obj)) {
PyErr_SetString(PyExc_TypeError,
"error reading from object, expected an iterable.");
goto fail;
}
Py_INCREF(obj);
it->iterator = obj;

strm->stream_data = (void *)it;
strm->stream_nextbuf = (void *)&it_nextbuf;
strm->stream_close = &it_del;
it->stream.stream_nextbuf = (void *)&it_nextbuf;
it->stream.stream_close = &it_del;

return strm;
it->encoding = encoding;
Py_INCREF(obj);
it->iterator = obj;

fail:
it_del(strm);
return NULL;
return (stream *)it;
}
45 changes: 45 additions & 0 deletions numpy/lib/tests/test_io.py
Expand Up @@ -3347,3 +3347,48 @@ def test_loadtxt_unicode_whitespace_stripping_complex(dtype):
def test_loadtxt_bad_complex(dtype, field):
with pytest.raises(ValueError):
np.loadtxt([field + "\n"], dtype=dtype, delimiter=",")


def test_loadtxt_iterator_fails_getting_next_line():
class BadSequence:
def __len__(self):
return 100

def __getitem__(self, item):
if item == 50:
raise RuntimeError("Bad things happened!")
return f"{item}, {item+1}"

with pytest.raises(RuntimeError, match="Bad things happened!"):
np.loadtxt(BadSequence(), dtype=int, delimiter=",")


class TestCReaderUnitTests:
# These are internal tests for path that should not be possible to hit
# unless things go very very wrong somewhere.
def test_not_an_filelike(self):
with pytest.raises(AttributeError, match=".*read"):
np.core._multiarray_umath._load_from_filelike(
object(), dtype=np.dtype("i"), filelike=True)

def test_filelike_read_fails(self):
# Can only be reached if loadtxt opens the file, so it is hard to do
# via the public interface (although maybe not impossible considering
# the current "DataClass" backing).
class BadFileLike:
counter = 0
def read(self, size):
self.counter += 1
if self.counter > 20:
raise RuntimeError("Bad bad bad!")
return "1,2,3\n"

with pytest.raises(RuntimeError, match="Bad bad bad!"):
np.core._multiarray_umath._load_from_filelike(
BadFileLike(), dtype=np.dtype("i"), filelike=True)

def test_not_an_iter(self):
with pytest.raises(TypeError,
match="error reading from object, expected an iterable"):
np.core._multiarray_umath._load_from_filelike(
object(), dtype=np.dtype("i"), filelike=False)

0 comments on commit 73940d6

Please sign in to comment.