Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.11] gh-115712: Support CSV dialects with delimiter=' ' and skipinitialspace=True (GH-115721) (GH-115729) #115738

Merged
merged 2 commits into from Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 56 additions & 5 deletions Lib/test/test_csv.py
Expand Up @@ -46,6 +46,20 @@ def _test_arg_valid(self, ctor, arg):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
ctor(arg, delimiter=' ')
ctor(arg, escapechar=' ')
ctor(arg, quotechar=' ')
ctor(arg, delimiter='\t', skipinitialspace=True)
ctor(arg, escapechar='\t', skipinitialspace=True)
ctor(arg, quotechar='\t', skipinitialspace=True)
ctor(arg, delimiter=' ', skipinitialspace=True)
ctor(arg, delimiter='^')
ctor(arg, escapechar='^')
ctor(arg, quotechar='^')
ctor(arg, delimiter='\x85')
ctor(arg, escapechar='\x85')
ctor(arg, quotechar='\x85')
ctor(arg, lineterminator='\x85')

def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
Expand Down Expand Up @@ -152,9 +166,6 @@ def _write_error_test(self, exc, fields, **kwargs):

def test_write_arg_valid(self):
self._write_error_test(csv.Error, None)
self._write_test((), '')
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
self._write_error_test(OSError, BadIterable())
class BadList:
Expand Down Expand Up @@ -271,6 +282,38 @@ def test_writerows_with_none(self):
fileobj.seek(0)
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')

def test_write_empty_fields(self):
self._write_test((), '')
self._write_test([''], '""')
self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE)
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE)
self._write_test(['', ''], ',')
self._write_test([None, None], ',')

def test_write_empty_fields_space_delimiter(self):
self._write_test([''], '""', delimiter=' ', skipinitialspace=False)
self._write_test([''], '""', delimiter=' ', skipinitialspace=True)
self._write_test([None], '""', delimiter=' ', skipinitialspace=False)
self._write_test([None], '""', delimiter=' ', skipinitialspace=True)

self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False)
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True)
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False)
self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True)

self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False,
quoting=csv.QUOTE_NONE)
self._write_error_test(csv.Error, ['', ''],
delimiter=' ', skipinitialspace=True,
quoting=csv.QUOTE_NONE)

self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False,
quoting=csv.QUOTE_NONE)
self._write_error_test(csv.Error, [None, None],
delimiter=' ', skipinitialspace=True,
quoting=csv.QUOTE_NONE)

def test_writerows_errors(self):
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
writer = csv.writer(fileobj)
Expand Down Expand Up @@ -372,6 +415,14 @@ def test_read_skipinitialspace(self):
[['no space', 'space', 'spaces', '\ttab']],
skipinitialspace=True)

def test_read_space_delimiter(self):
self._read_test(['a b', ' a ', ' ', ''],
[['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []],
delimiter=' ', skipinitialspace=False)
self._read_test(['a b', ' a ', ' ', ''],
[['a', 'b'], ['a', ''], [''], []],
delimiter=' ', skipinitialspace=True)

def test_read_bigfield(self):
# This exercises the buffer realloc functionality and field size
# limits.
Expand Down Expand Up @@ -498,10 +549,10 @@ class space(csv.excel):
escapechar = "\\"

with TemporaryFile("w+", encoding="utf-8") as fileobj:
fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.seek(0)
reader = csv.reader(fileobj, dialect=space())
self.assertEqual(next(reader), ["abc", "def"])
self.assertEqual(next(reader), ["abc", "", "", "def"])
self.assertEqual(next(reader), ["c1ccccc1", "benzene"])

def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):
Expand Down
@@ -0,0 +1,3 @@
:func:`csv.writer()` now quotes empty fields if delimiter is a
space and skipinitialspace is true and raises exception if quoting is not
possible.
10 changes: 10 additions & 0 deletions Modules/_csv.c
Expand Up @@ -1180,6 +1180,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
static int
join_append(WriterObj *self, PyObject *field, int quoted)
{
DialectObj *dialect = self->dialect;
unsigned int field_kind = -1;
const void *field_data = NULL;
Py_ssize_t field_len = 0;
Expand All @@ -1192,6 +1193,15 @@ join_append(WriterObj *self, PyObject *field, int quoted)
field_data = PyUnicode_DATA(field);
field_len = PyUnicode_GET_LENGTH(field);
}
if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
if (dialect->quoting == QUOTE_NONE) {
PyErr_Format(self->error_obj,
"empty field must be quoted if delimiter is a space "
"and skipinitialspace is true");
return 0;
}
quoted = 1;
}
rec_len = join_append_data(self, field_kind, field_data, field_len,
&quoted, 0);
if (rec_len < 0)
Expand Down