Skip to content

Commit

Permalink
pythongh-113796: Add more validation checks in the csv.Dialect constr…
Browse files Browse the repository at this point in the history
…uctor

ValueError is now raised if the same character is used in different roles.
  • Loading branch information
serhiy-storchaka committed Jan 7, 2024
1 parent f19b93f commit a1ee894
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 13 deletions.
67 changes: 54 additions & 13 deletions Lib/test/test_csv.py
Expand Up @@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase):
in TestDialectRegistry.
"""
def _test_arg_valid(self, ctor, arg):
ctor(arg)
self.assertRaises(TypeError, ctor)
self.assertRaises(TypeError, ctor, None)
self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
self.assertRaises(TypeError, ctor, arg, bad_attr=0)
self.assertRaises(TypeError, ctor, arg, delimiter='')
self.assertRaises(TypeError, ctor, arg, escapechar='')
self.assertRaises(TypeError, ctor, arg, quotechar='')
self.assertRaises(TypeError, ctor, arg, delimiter='^^')
self.assertRaises(TypeError, ctor, arg, escapechar='^^')
self.assertRaises(TypeError, ctor, arg, quotechar='^^')
self.assertRaises(csv.Error, ctor, arg, 'foo')
self.assertRaises(TypeError, ctor, arg, delimiter=None)
self.assertRaises(TypeError, ctor, arg, delimiter=1)
self.assertRaises(TypeError, ctor, arg, escapechar=1)
self.assertRaises(TypeError, ctor, arg, quotechar=1)
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
Expand All @@ -46,6 +52,40 @@ def _test_arg_valid(self, ctor, arg):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
self.assertRaises(ValueError, ctor, arg, delimiter='\n')
self.assertRaises(ValueError, ctor, arg, escapechar='\n')
self.assertRaises(ValueError, ctor, arg, quotechar='\n')
self.assertRaises(ValueError, ctor, arg, delimiter='\r')
self.assertRaises(ValueError, ctor, arg, escapechar='\r')
self.assertRaises(ValueError, ctor, arg, quotechar='\r')
ctor(arg, delimiter=' ')
ctor(arg, escapechar=' ')
ctor(arg, quotechar=' ')
ctor(arg, delimiter='\t', skipinitialspace=True)
ctor(arg, escapechar='\t', skipinitialspace=True)
ctor(arg, quotechar='\t', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
delimiter=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
escapechar=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
quotechar=' ', skipinitialspace=True)
ctor(arg, delimiter='^')
ctor(arg, escapechar='^')
ctor(arg, quotechar='^')
self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^')
self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^')
self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^')
ctor(arg, delimiter='\x85')
ctor(arg, escapechar='\x85')
ctor(arg, quotechar='\x85')
ctor(arg, lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
delimiter='\x85', lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
escapechar='\x85', lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
quotechar='\x85', lineterminator='\x85')

def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
Expand Down Expand Up @@ -530,14 +570,6 @@ class unspecified():
finally:
csv.unregister_dialect('testC')

def test_bad_dialect(self):
# Unknown parameter
self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
# Bad values
self.assertRaises(TypeError, csv.reader, [], delimiter = None)
self.assertRaises(TypeError, csv.reader, [], quoting = -1)
self.assertRaises(TypeError, csv.reader, [], quoting = 100)

def test_copy(self):
for name in csv.list_dialects():
dialect = csv.get_dialect(name)
Expand Down Expand Up @@ -1083,10 +1115,15 @@ class mydialect(csv.Dialect):
'"lineterminator" must be a string')

def test_invalid_chars(self):
def create_invalid(field_name, value):
def create_invalid(field_name, value, **kwargs):
class mydialect(csv.Dialect):
pass
delimiter = ','
quoting = csv.QUOTE_ALL
quotechar = '"'
lineterminator = '\r\n'
setattr(mydialect, field_name, value)
for field_name, value in kwargs.items():
setattr(mydialect, field_name, value)
d = mydialect()

for field_name in ("delimiter", "escapechar", "quotechar"):
Expand All @@ -1095,6 +1132,10 @@ class mydialect(csv.Dialect):
self.assertRaises(csv.Error, create_invalid, field_name, "abc")
self.assertRaises(csv.Error, create_invalid, field_name, b'x')
self.assertRaises(csv.Error, create_invalid, field_name, 5)
self.assertRaises(ValueError, create_invalid, field_name, "\n")
self.assertRaises(ValueError, create_invalid, field_name, "\r")
self.assertRaises(ValueError, create_invalid, field_name, " ",
skipinitialspace=True)


class TestSniffer(unittest.TestCase):
Expand Down
@@ -0,0 +1,3 @@
Add more validation checks in the :class:`csv.Dialect` constructor.
:exc:`ValueError` is now raised if the same character is used in different
roles.
39 changes: 39 additions & 0 deletions Modules/_csv.c
Expand Up @@ -333,6 +333,33 @@ dialect_check_quoting(int quoting)
return -1;
}

static int
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect)
{
if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) {
PyErr_Format(PyExc_ValueError, "bad %s value", name);
return -1;
}
if (PyUnicode_FindChar(
dialect->lineterminator, c, 0,
PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0)
{
PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name);
return -1;
}
return 0;
}

static int
dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2)
{
if (c1 == c2 && c1 != NOT_SET) {
PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2);
return -1;
}
return 0;
}

#define D_OFF(x) offsetof(DialectObj, x)

static struct PyMemberDef Dialect_memberlist[] = {
Expand Down Expand Up @@ -510,6 +537,18 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
goto err;
}
if (dialect_check_char("delimiter", self->delimiter, self) ||
dialect_check_char("escapechar", self->escapechar, self) ||
dialect_check_char("quotechar", self->quotechar, self) ||
dialect_check_chars("delimiter", "escapechar",
self->delimiter, self->escapechar) ||
dialect_check_chars("delimiter", "quotechar",
self->delimiter, self->quotechar) ||
dialect_check_chars("escapechar", "quotechar",
self->escapechar, self->quotechar))
{
goto err;
}

ret = Py_NewRef(self);
err:
Expand Down

0 comments on commit a1ee894

Please sign in to comment.