From efe921cd18fa21f3ea508c7122acb971124eb9cc Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sat, 30 Aug 2025 22:35:32 +0200 Subject: [PATCH 01/11] csv.writer w/ PyUnicodeWriter --- Modules/_csv.c | 443 ++++++++++++++++++++++--------------------------- 1 file changed, 198 insertions(+), 245 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 2e04136e0ac657..19c8ae74e15244 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -139,16 +139,9 @@ typedef struct { typedef struct { PyObject_HEAD - PyObject *write; /* write output lines to this file */ - - DialectObj *dialect; /* parsing dialect */ - - Py_UCS4 *rec; /* buffer for parser.join */ - Py_ssize_t rec_size; /* size of allocated record */ - Py_ssize_t rec_len; /* length of record */ - int num_fields; /* number of fields in record */ - - PyObject *error_obj; /* cached error object */ + PyObject *write; /* write output lines to this file */ + DialectObj *dialect; /* parsing dialect */ + PyObject *error_obj; /* cached error object */ } WriterObj; #define _DialectObj_CAST(op) ((DialectObj *)(op)) @@ -1110,197 +1103,127 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) /* * WRITER */ -/* ---------------------------------------------------------------- */ -static void -join_reset(WriterObj *self) -{ - self->rec_len = 0; - self->num_fields = 0; -} - -#define MEM_INCR 32768 +static inline int +_is_structural_char(Py_UCS4 c, DialectObj *dialect) { + Py_ssize_t term_len = PyUnicode_GET_LENGTH(dialect->lineterminator); -/* Calculate new record length or append field to record. Return new - * record length. - */ -static Py_ssize_t -join_append_data(WriterObj *self, int field_kind, const void *field_data, - Py_ssize_t field_len, int *quoted, - int copy_phase) -{ - DialectObj *dialect = self->dialect; - Py_ssize_t i; - Py_ssize_t rec_len; - -#define INCLEN \ - do {\ - if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \ - goto overflow; \ - } \ - rec_len++; \ - } while(0) - -#define ADDCH(c) \ - do {\ - if (copy_phase) \ - self->rec[rec_len] = c;\ - INCLEN;\ - } while(0) - - rec_len = self->rec_len; - - /* If this is not the first field we need a field separator */ - if (self->num_fields > 0) - ADDCH(dialect->delimiter); - - /* Handle preceding quote */ - if (copy_phase && *quoted) - ADDCH(dialect->quotechar); - - /* Copy/count field data */ - /* If field is null just pass over */ - for (i = 0; field_data && (i < field_len); i++) { - Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i); - int want_escape = 0; - - if (c == dialect->delimiter || - c == dialect->escapechar || - c == dialect->quotechar || - c == '\n' || - c == '\r' || - PyUnicode_FindChar( - dialect->lineterminator, c, 0, - PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) { - if (dialect->quoting == QUOTE_NONE) - want_escape = 1; - else { - if (c == dialect->quotechar) { - if (dialect->doublequote) - ADDCH(dialect->quotechar); - else - want_escape = 1; - } - else if (c == dialect->escapechar) { - want_escape = 1; - } - if (!want_escape) - *quoted = 1; - } - if (want_escape) { - if (dialect->escapechar == NOT_SET) { - PyErr_Format(self->error_obj, - "need to escape, but no escapechar set"); - return -1; - } - ADDCH(dialect->escapechar); - } - } - /* Copy field character into record buffer. - */ - ADDCH(c); + if (c == dialect->delimiter || c == '\n' || c == '\r') { + return 1; } - - if (*quoted) { - if (copy_phase) - ADDCH(dialect->quotechar); - else { - INCLEN; /* starting quote */ - INCLEN; /* ending quote */ - } + if (term_len > 0 && PyUnicode_FindChar(dialect->lineterminator, c, 0, term_len, 1) >= 0) { + return 1; } - return rec_len; - - overflow: - PyErr_NoMemory(); - return -1; -#undef ADDCH -#undef INCLEN -} - -static int -join_check_rec_size(WriterObj *self, Py_ssize_t rec_len) -{ - assert(rec_len >= 0); - - if (rec_len > self->rec_size) { - size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR; - Py_UCS4 *rec_new = self->rec; - PyMem_Resize(rec_new, Py_UCS4, rec_size_new); - if (rec_new == NULL) { - PyErr_NoMemory(); - return 0; - } - self->rec = rec_new; - self->rec_size = (Py_ssize_t)rec_size_new; - } - return 1; + return 0; } static int -join_append(WriterObj *self, PyObject *field, int quoted) +_write_field(PyUnicodeWriter *writer, WriterObj *self, PyObject *field, int *quoted) { DialectObj *dialect = self->dialect; - int field_kind = -1; - const void *field_data = NULL; Py_ssize_t field_len = 0; - Py_ssize_t rec_len; - if (field != NULL) { - field_kind = PyUnicode_KIND(field); - field_data = PyUnicode_DATA(field); + bool is_none = (field == NULL); + + if (!is_none) { + assert(PyUnicode_Check(field)); field_len = PyUnicode_GET_LENGTH(field); + if (field_len < 0) { + return -1; + } } - if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) { + + if (field_len == 0 && dialect->delimiter == ' ' && dialect->skipinitialspace) { if (dialect->quoting == QUOTE_NONE || - (field == NULL && - (dialect->quoting == QUOTE_STRINGS || - dialect->quoting == QUOTE_NOTNULL))) - { - PyErr_Format(self->error_obj, - "empty field must be quoted if delimiter is a space " - "and skipinitialspace is true"); - return 0; + (is_none && (dialect->quoting == QUOTE_STRINGS || + dialect->quoting == QUOTE_NOTNULL))) { + PyErr_SetString(self->error_obj, + "empty field must be quoted if delimiter is a space and skipinitialspace is true"); + return -1; + } + *quoted = 1; + } + + /* For ANY quoting != QUOTE_NONE, structural characters force quoting */ + if (!*quoted && !is_none && field_len > 0 && dialect->quoting != QUOTE_NONE) { + Py_ssize_t i; + for (i = 0; i < field_len; i++) { + Py_UCS4 c = PyUnicode_READ_CHAR(field, i); + if (_is_structural_char(c, dialect) || + (c == dialect->quotechar && dialect->doublequote)) { + *quoted = 1; + break; + } } - quoted = 1; } - rec_len = join_append_data(self, field_kind, field_data, field_len, - "ed, 0); - if (rec_len < 0) - return 0; - - /* grow record buffer if necessary */ - if (!join_check_rec_size(self, rec_len)) - return 0; - - self->rec_len = join_append_data(self, field_kind, field_data, field_len, - "ed, 1); - self->num_fields++; - - return 1; -} -static int -join_append_lineterminator(WriterObj *self) -{ - Py_ssize_t terminator_len, i; - int term_kind; - const void *term_data; + /* open if needed */ + if (*quoted) { + if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) { + return -1; + } + } - terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); - if (terminator_len == -1) - return 0; + if (!is_none && field_len > 0) { + Py_ssize_t i; + for (i = 0; i < field_len; i++) { + Py_UCS4 c = PyUnicode_READ_CHAR(field, i); + + if (dialect->quoting == QUOTE_NONE) { + /* escape structural characters when we cannot quote. */ + if (_is_structural_char(c, dialect) || + c == dialect->escapechar || + c == dialect->quotechar) { + if (dialect->escapechar == NOT_SET) { + PyErr_SetString(self->error_obj, "need to escape, but no escapechar set"); + return -1; + } + if (PyUnicodeWriter_WriteChar(writer, dialect->escapechar) < 0) { + return -1; + } + } + } + else { + /* handle in-field quoting/escaping. */ + if (c == dialect->quotechar) { + if (dialect->doublequote) { + /* double the quote inside a quoted field */ + if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) { + return -1; + } + } + else { + /* ...or escape if we cannot double. */ + if (dialect->escapechar == NOT_SET) { + PyErr_SetString(self->error_obj, "need to escape, but no escapechar set"); + return -1; + } + if (PyUnicodeWriter_WriteChar(writer, dialect->escapechar) < 0) { + return -1; + } + } + } + else if (c == dialect->escapechar && dialect->escapechar != NOT_SET) { + /* escape literal escapechar */ + if (PyUnicodeWriter_WriteChar(writer, dialect->escapechar) < 0) { + return -1; + } + } + } - /* grow record buffer if necessary */ - if (!join_check_rec_size(self, self->rec_len + terminator_len)) - return 0; + if (PyUnicodeWriter_WriteChar(writer, c) < 0) { + return -1; + } + } + } - term_kind = PyUnicode_KIND(self->dialect->lineterminator); - term_data = PyUnicode_DATA(self->dialect->lineterminator); - for (i = 0; i < terminator_len; i++) - self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i); - self->rec_len += terminator_len; + /* close if needed */ + if (*quoted) { + if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) { + return -1; + } + } - return 1; + return 0; } PyDoc_STRVAR(csv_writerow_doc, @@ -1315,7 +1238,12 @@ csv_writerow(PyObject *op, PyObject *seq) WriterObj *self = _WriterObj_CAST(op); DialectObj *dialect = self->dialect; PyObject *iter, *field, *line, *result; - bool null_field = false; + PyUnicodeWriter *writer = NULL; + Py_ssize_t field_count = 0; + + bool first_field_was_empty_like = false; + bool first_field_was_none = false; + bool first_field_was_quoted_in_loop = false; iter = PyObject_GetIter(seq); if (iter == NULL) { @@ -1327,90 +1255,124 @@ csv_writerow(PyObject *op, PyObject *seq) return NULL; } - /* Join all fields in internal buffer. - */ - join_reset(self); + writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + Py_DECREF(iter); + return NULL; + } + while ((field = PyIter_Next(iter))) { - int append_ok; int quoted; + PyObject *str_field = NULL; + bool is_none = (field == Py_None); + + if (field_count > 0) { + if (PyUnicodeWriter_WriteChar(writer, dialect->delimiter) < 0) { + Py_DECREF(field); + goto error; + } + } switch (dialect->quoting) { - case QUOTE_NONNUMERIC: - quoted = !PyNumber_Check(field); - break; - case QUOTE_ALL: - quoted = 1; - break; - case QUOTE_STRINGS: - quoted = PyUnicode_Check(field); - break; - case QUOTE_NOTNULL: - quoted = field != Py_None; - break; - default: - quoted = 0; - break; + case QUOTE_NONNUMERIC: + quoted = !PyNumber_Check(field); + break; + case QUOTE_ALL: + quoted = 1; + break; + case QUOTE_STRINGS: + quoted = !is_none && PyUnicode_Check(field); + break; + case QUOTE_NOTNULL: + quoted = !is_none; + break; + default: /* Default QUOTE_MINIMAL or other to minimal behavior */ + quoted = 0; + break; } - null_field = (field == Py_None); - if (PyUnicode_Check(field)) { - append_ok = join_append(self, field, quoted); - Py_DECREF(field); + if (is_none) { + /* None is NULL. */ + str_field = NULL; + } else if (PyUnicode_Check(field)) { + str_field = Py_NewRef(field); + } else { + str_field = PyObject_Str(field); + if (str_field == NULL) { + Py_DECREF(field); + goto error; + } } - else if (null_field) { - append_ok = join_append(self, NULL, quoted); - Py_DECREF(field); + + /* Single empty field special case */ + if (field_count == 0) { + first_field_was_none = is_none; + if (is_none) { + first_field_was_empty_like = true; + } else { + Py_ssize_t len = str_field ? PyUnicode_GET_LENGTH(str_field) : 0; + first_field_was_empty_like = (len == 0); + } } - else { - PyObject *str; - str = PyObject_Str(field); + /* Important side-effect: this may promote to "quoted". */ + if (_write_field(writer, self, str_field, "ed) < 0) { + Py_XDECREF(str_field); Py_DECREF(field); - if (str == NULL) { - Py_DECREF(iter); - return NULL; - } - append_ok = join_append(self, str, quoted); - Py_DECREF(str); + goto error; } - if (!append_ok) { - Py_DECREF(iter); - return NULL; + + if (field_count == 0) { + first_field_was_quoted_in_loop = (quoted != 0); } + + Py_XDECREF(str_field); + Py_DECREF(field); + field_count++; + } + + if (PyErr_Occurred()) { + goto error; } Py_DECREF(iter); - if (PyErr_Occurred()) - return NULL; + iter = NULL; - if (self->num_fields > 0 && self->rec_len == 0) { + /* Single empty-field rule */ + if (field_count == 1 && first_field_was_empty_like && !first_field_was_quoted_in_loop) { if (dialect->quoting == QUOTE_NONE || - (null_field && + (first_field_was_none && (dialect->quoting == QUOTE_STRINGS || - dialect->quoting == QUOTE_NOTNULL))) - { - PyErr_Format(self->error_obj, - "single empty field record must be quoted"); + dialect->quoting == QUOTE_NOTNULL))) { + PyErr_SetString(self->error_obj, + "single empty field record must be quoted"); + PyUnicodeWriter_Discard(writer); return NULL; } - self->num_fields--; - if (!join_append(self, NULL, 1)) - return NULL; + if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) goto error_after_iter; + if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) goto error_after_iter; } - /* Add line terminator. - */ - if (!join_append_lineterminator(self)) { + if (PyUnicodeWriter_WriteStr(writer, self->dialect->lineterminator) < 0) { +error_after_iter: + PyUnicodeWriter_Discard(writer); return NULL; } - line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, - (void *) self->rec, self->rec_len); + line = PyUnicodeWriter_Finish(writer); + writer = NULL; if (line == NULL) { return NULL; } result = PyObject_CallOneArg(self->write, line); Py_DECREF(line); return result; + +error: + Py_XDECREF(iter); + if (writer) { + PyUnicodeWriter_Discard(writer); + } + return NULL; } PyDoc_STRVAR(csv_writerows_doc, @@ -1487,9 +1449,6 @@ Writer_dealloc(PyObject *op) PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); tp->tp_clear(op); - if (self->rec != NULL) { - PyMem_Free(self->rec); - } PyObject_GC_Del(self); Py_DECREF(tp); } @@ -1532,12 +1491,6 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) self->dialect = NULL; self->write = NULL; - - self->rec = NULL; - self->rec_size = 0; - self->rec_len = 0; - self->num_fields = 0; - self->error_obj = Py_NewRef(module_state->error_obj); if (!PyArg_UnpackTuple(args, "writer", 1, 2, &output_file, &dialect)) { From d5c8539b35baa166110fe6c8162e761a14b931c6 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sat, 30 Aug 2025 23:04:28 +0200 Subject: [PATCH 02/11] style; need to turn some vscode ext --- Modules/_csv.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 19c8ae74e15244..77dbd97403f8f7 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1274,21 +1274,21 @@ csv_writerow(PyObject *op, PyObject *seq) } switch (dialect->quoting) { - case QUOTE_NONNUMERIC: - quoted = !PyNumber_Check(field); - break; - case QUOTE_ALL: - quoted = 1; - break; - case QUOTE_STRINGS: - quoted = !is_none && PyUnicode_Check(field); - break; - case QUOTE_NOTNULL: - quoted = !is_none; - break; - default: /* Default QUOTE_MINIMAL or other to minimal behavior */ - quoted = 0; - break; + case QUOTE_NONNUMERIC: + quoted = !PyNumber_Check(field); + break; + case QUOTE_ALL: + quoted = 1; + break; + case QUOTE_STRINGS: + quoted = !is_none && PyUnicode_Check(field); + break; + case QUOTE_NOTNULL: + quoted = !is_none; + break; + default: /* Default QUOTE_MINIMAL or other to minimal behavior */ + quoted = 0; + break; } if (is_none) { From f6db601a1aff778e1678be2f4d2f52d6e695e0d8 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sat, 30 Aug 2025 23:10:18 +0200 Subject: [PATCH 03/11] blurb --- .../next/Library/2025-08-30-23-10-11.gh-issue-138270.wsJJRV.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-08-30-23-10-11.gh-issue-138270.wsJJRV.rst diff --git a/Misc/NEWS.d/next/Library/2025-08-30-23-10-11.gh-issue-138270.wsJJRV.rst b/Misc/NEWS.d/next/Library/2025-08-30-23-10-11.gh-issue-138270.wsJJRV.rst new file mode 100644 index 00000000000000..546e23faf4265e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-30-23-10-11.gh-issue-138270.wsJJRV.rst @@ -0,0 +1,2 @@ +Use :c:type:`PyUnicodeWriter` in :func:`csv.writer`, instead of constructing +the buffers manually. From 3c3f7ec54e7e8f500f03c6e3b64db3f264e69579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maurycy=20Paw=C5=82owski-Wiero=C5=84ski?= <5383+maurycy@users.noreply.github.com> Date: Sun, 31 Aug 2025 02:52:49 +0200 Subject: [PATCH 04/11] pep7, redundant comments --- Modules/_csv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 77dbd97403f8f7..fefcb2f5b7389c 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1286,7 +1286,7 @@ csv_writerow(PyObject *op, PyObject *seq) case QUOTE_NOTNULL: quoted = !is_none; break; - default: /* Default QUOTE_MINIMAL or other to minimal behavior */ + default: quoted = 0; break; } @@ -1294,9 +1294,11 @@ csv_writerow(PyObject *op, PyObject *seq) if (is_none) { /* None is NULL. */ str_field = NULL; - } else if (PyUnicode_Check(field)) { + } + else if (PyUnicode_Check(field)) { str_field = Py_NewRef(field); - } else { + } + else { str_field = PyObject_Str(field); if (str_field == NULL) { Py_DECREF(field); @@ -1309,13 +1311,13 @@ csv_writerow(PyObject *op, PyObject *seq) first_field_was_none = is_none; if (is_none) { first_field_was_empty_like = true; - } else { + } + else { Py_ssize_t len = str_field ? PyUnicode_GET_LENGTH(str_field) : 0; first_field_was_empty_like = (len == 0); } } - /* Important side-effect: this may promote to "quoted". */ if (_write_field(writer, self, str_field, "ed) < 0) { Py_XDECREF(str_field); Py_DECREF(field); From a9e6d3e15d35a18ee2f00f96f15741f9475de2c0 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sun, 31 Aug 2025 02:59:07 +0200 Subject: [PATCH 05/11] defensive check --- Modules/_csv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index fefcb2f5b7389c..fc525e9aec98eb 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1281,7 +1281,7 @@ csv_writerow(PyObject *op, PyObject *seq) quoted = 1; break; case QUOTE_STRINGS: - quoted = !is_none && PyUnicode_Check(field); + quoted = PyUnicode_Check(field); break; case QUOTE_NOTNULL: quoted = !is_none; From 07ad8d0fa0897d071105cd7d3b5da15bb8c25886 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sun, 31 Aug 2025 03:01:01 +0200 Subject: [PATCH 06/11] pep7 --- Modules/_csv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index fc525e9aec98eb..2cde0731fc9922 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1350,8 +1350,12 @@ csv_writerow(PyObject *op, PyObject *seq) PyUnicodeWriter_Discard(writer); return NULL; } - if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) goto error_after_iter; - if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) goto error_after_iter; + if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) { + goto error_after_iter; + } + if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) { + goto error_after_iter; + } } if (PyUnicodeWriter_WriteStr(writer, self->dialect->lineterminator) < 0) { From 66cf1b063a359783fd6fc42c16bd9c518bf18725 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sun, 31 Aug 2025 03:03:36 +0200 Subject: [PATCH 07/11] s/is_none/null_field/ --- Modules/_csv.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 2cde0731fc9922..1057edd8d39cc1 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1264,7 +1264,7 @@ csv_writerow(PyObject *op, PyObject *seq) while ((field = PyIter_Next(iter))) { int quoted; PyObject *str_field = NULL; - bool is_none = (field == Py_None); + bool null_field = (field == Py_None); if (field_count > 0) { if (PyUnicodeWriter_WriteChar(writer, dialect->delimiter) < 0) { @@ -1284,15 +1284,14 @@ csv_writerow(PyObject *op, PyObject *seq) quoted = PyUnicode_Check(field); break; case QUOTE_NOTNULL: - quoted = !is_none; + quoted = !null_field; break; default: quoted = 0; break; } - if (is_none) { - /* None is NULL. */ + if (null_field) { str_field = NULL; } else if (PyUnicode_Check(field)) { @@ -1308,8 +1307,8 @@ csv_writerow(PyObject *op, PyObject *seq) /* Single empty field special case */ if (field_count == 0) { - first_field_was_none = is_none; - if (is_none) { + first_field_was_none = null_field; + if (null_field) { first_field_was_empty_like = true; } else { From dbd0b3dea1d85aaac6eeb2fcdfd273acc1af8b2d Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sun, 31 Aug 2025 03:08:55 +0200 Subject: [PATCH 08/11] one-line cond --- Modules/_csv.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 1057edd8d39cc1..396dd576d40ea8 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1308,13 +1308,7 @@ csv_writerow(PyObject *op, PyObject *seq) /* Single empty field special case */ if (field_count == 0) { first_field_was_none = null_field; - if (null_field) { - first_field_was_empty_like = true; - } - else { - Py_ssize_t len = str_field ? PyUnicode_GET_LENGTH(str_field) : 0; - first_field_was_empty_like = (len == 0); - } + first_field_was_empty_like = null_field || !PyUnicode_GET_LENGTH(str_field); } if (_write_field(writer, self, str_field, "ed) < 0) { From d67716185fe955ef7da1a37ed8752f099289e0e6 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sun, 31 Aug 2025 22:54:47 +0200 Subject: [PATCH 09/11] PyUnicodeWriter_Discard no/op for NULL --- Modules/_csv.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 396dd576d40ea8..3a0fc2134c0985 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1368,9 +1368,7 @@ csv_writerow(PyObject *op, PyObject *seq) error: Py_XDECREF(iter); - if (writer) { - PyUnicodeWriter_Discard(writer); - } + PyUnicodeWriter_Discard(writer); return NULL; } From 1836417e122ea5a5299937ee7f59687a7e9cff37 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Sun, 31 Aug 2025 23:12:21 +0200 Subject: [PATCH 10/11] move error_after_iter --- Modules/_csv.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 3a0fc2134c0985..8bc969c996bbb2 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1340,8 +1340,7 @@ csv_writerow(PyObject *op, PyObject *seq) dialect->quoting == QUOTE_NOTNULL))) { PyErr_SetString(self->error_obj, "single empty field record must be quoted"); - PyUnicodeWriter_Discard(writer); - return NULL; + goto error_after_iter; } if (PyUnicodeWriter_WriteChar(writer, dialect->quotechar) < 0) { goto error_after_iter; @@ -1352,9 +1351,7 @@ csv_writerow(PyObject *op, PyObject *seq) } if (PyUnicodeWriter_WriteStr(writer, self->dialect->lineterminator) < 0) { -error_after_iter: - PyUnicodeWriter_Discard(writer); - return NULL; + goto error_after_iter; } line = PyUnicodeWriter_Finish(writer); @@ -1368,6 +1365,7 @@ csv_writerow(PyObject *op, PyObject *seq) error: Py_XDECREF(iter); +error_after_iter: PyUnicodeWriter_Discard(writer); return NULL; } From 88e4a180ea3a8d700bddaef1299c0a72a9a0e7b4 Mon Sep 17 00:00:00 2001 From: maurycy <5383+maurycy@users.noreply.github.com> Date: Mon, 1 Sep 2025 03:07:41 +0200 Subject: [PATCH 11/11] c'mon --- Modules/_csv.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 8bc969c996bbb2..a0c1f71f37abc7 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1104,9 +1104,7 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) * WRITER */ static inline int -_is_structural_char(Py_UCS4 c, DialectObj *dialect) { - Py_ssize_t term_len = PyUnicode_GET_LENGTH(dialect->lineterminator); - +_is_structural_char(Py_UCS4 c, DialectObj *dialect, Py_ssize_t term_len) { if (c == dialect->delimiter || c == '\n' || c == '\r') { return 1; } @@ -1120,6 +1118,7 @@ static int _write_field(PyUnicodeWriter *writer, WriterObj *self, PyObject *field, int *quoted) { DialectObj *dialect = self->dialect; + Py_ssize_t term_len = PyUnicode_GET_LENGTH(dialect->lineterminator); Py_ssize_t field_len = 0; bool is_none = (field == NULL); @@ -1148,7 +1147,7 @@ _write_field(PyUnicodeWriter *writer, WriterObj *self, PyObject *field, int *quo Py_ssize_t i; for (i = 0; i < field_len; i++) { Py_UCS4 c = PyUnicode_READ_CHAR(field, i); - if (_is_structural_char(c, dialect) || + if (_is_structural_char(c, dialect, term_len) || (c == dialect->quotechar && dialect->doublequote)) { *quoted = 1; break; @@ -1170,7 +1169,7 @@ _write_field(PyUnicodeWriter *writer, WriterObj *self, PyObject *field, int *quo if (dialect->quoting == QUOTE_NONE) { /* escape structural characters when we cannot quote. */ - if (_is_structural_char(c, dialect) || + if (_is_structural_char(c, dialect, term_len) || c == dialect->escapechar || c == dialect->quotechar) { if (dialect->escapechar == NOT_SET) {