From a796f9d5e219d5352379afd5faddab2cbacf6ddb Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke Date: Sun, 7 Sep 2025 17:01:17 +0200 Subject: [PATCH 1/5] Increase test coverage for csv.DictReader and csv.Sniffer Previously there were no tests for the DictReader fieldnames setter, the case where a StopIteration was encountered when trying to determine the fieldnames from the content or the case where Sniffer could not find a delimiter. --- Lib/test/test_csv.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 60feab225a107c..0fbd524865c565 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -918,6 +918,13 @@ def test_dict_reader_fieldnames_accepts_list(self): reader = csv.DictReader(f, fieldnames) self.assertEqual(reader.fieldnames, fieldnames) + def test_dict_reader_set_fieldnames(self): + fieldnames = ["a", "b", "c"] + f = StringIO() + reader = csv.DictReader(f) + reader.fieldnames = fieldnames + self.assertEqual(reader.fieldnames, fieldnames) + def test_dict_writer_fieldnames_rejects_iter(self): fieldnames = ["a", "b", "c"] f = StringIO() @@ -933,6 +940,7 @@ def test_dict_writer_fieldnames_accepts_list(self): def test_dict_reader_fieldnames_is_optional(self): f = StringIO() reader = csv.DictReader(f, fieldnames=None) + self.assertEqual(reader.fieldnames, None) def test_read_dict_fields(self): with TemporaryFile("w+", encoding="utf-8") as fileobj: @@ -951,7 +959,7 @@ def test_read_dict_no_fieldnames(self): self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) # Two test cases to make sure existing ways of implicitly setting - # fieldnames continue to work. Both arise from discussion in issue3436. + # fieldnames continue to work. Both arise from discussion in issue3436. def test_read_dict_fieldnames_from_file(self): with TemporaryFile("w+", encoding="utf-8") as fileobj: fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") @@ -1353,6 +1361,9 @@ class TestSniffer(unittest.TestCase): ghi\0jkl """ + sample15 = "\n\n\n" + sample16 = "abc\ndef\nghi" + def test_issue43625(self): sniffer = csv.Sniffer() self.assertTrue(sniffer.has_header(self.sample12)) @@ -1423,6 +1434,11 @@ def test_delimiters(self): self.assertEqual(dialect.quotechar, "'") dialect = sniffer.sniff(self.sample14) self.assertEqual(dialect.delimiter, '\0') + self.assertRaisesRegex(csv.Error, "Could not determine delimiter", + sniffer.sniff, self.sample15) + self.assertRaisesRegex(csv.Error, "Could not determine delimiter", + sniffer.sniff, self.sample16) + def test_doublequote(self): sniffer = csv.Sniffer() From 22346fbd8dd6c564915c9cbf51746f272af2c474 Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke Date: Sun, 7 Sep 2025 19:04:57 +0200 Subject: [PATCH 2/5] Revert whitespace change to comment --- Lib/test/test_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 0fbd524865c565..faa3f0afb55939 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -959,7 +959,7 @@ def test_read_dict_no_fieldnames(self): self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"]) # Two test cases to make sure existing ways of implicitly setting - # fieldnames continue to work. Both arise from discussion in issue3436. + # fieldnames continue to work. Both arise from discussion in issue3436. def test_read_dict_fieldnames_from_file(self): with TemporaryFile("w+", encoding="utf-8") as fileobj: fileobj.write("f1,f2,f3\r\n1,2,abc\r\n") From 9d6d36f82334dd69985a93876dd923fa7d24c3e5 Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke Date: Mon, 8 Sep 2025 09:31:24 +0200 Subject: [PATCH 3/5] Add a test that csv.Sniffer.has_header checks up to 20 rows --- Lib/test/test_csv.py | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index faa3f0afb55939..9f8913e1edd7d5 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1364,6 +1364,56 @@ class TestSniffer(unittest.TestCase): sample15 = "\n\n\n" sample16 = "abc\ndef\nghi" + sample17 = """\ +name,age +Alice,30 +Bob,40 +Carol,50 +Dave,60 +Eve,70 +Frank,80 +Grace,90 +Heidi,100 +Ivan,110 +Judy,120 +Karl,130 +Liam,140 +Mallory,150 +Niaj,160 +Olivia,170 +Peggy,180 +Quinn,190 +Rupert,200 +Sybil,210 +Trent,220 +Victor,not_a_number +""" + sample18 = """\ +name,age +Alice,30 +Bob,40 +Carol,50 +Dave,60 +Eve,70 +Frank,80 +Grace,90 +Heidi,100 +Ivan,110 +Judy,120 +Karl,130 +Liam,140 +Mallory,150 +Niaj,160 +Olivia,170 +Peggy,180 +Quinn,190 +Rupert,200 +Sybil,210 +Trent,220 +Uma,230 +Victor,not_a_number +""" + def test_issue43625(self): sniffer = csv.Sniffer() self.assertTrue(sniffer.has_header(self.sample12)) @@ -1385,6 +1435,11 @@ def test_has_header_regex_special_delimiter(self): self.assertIs(sniffer.has_header(self.sample8), False) self.assertIs(sniffer.has_header(self.header2 + self.sample8), True) + def test_has_header_checks_20_rows(self): + sniffer = csv.Sniffer() + self.assertIs(sniffer.has_header(self.sample17), False) + self.assertIs(sniffer.has_header(self.sample18), True) + def test_guess_quote_and_delimiter(self): sniffer = csv.Sniffer() for header in (";'123;4';", "'123;4';", ";'123;4'", "'123;4'"): From e44e9dff848600b8d9df50daab1edb5c891ea192 Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke <47750513+JanEricNitschke@users.noreply.github.com> Date: Mon, 8 Sep 2025 11:59:49 +0200 Subject: [PATCH 4/5] Replace name and age with letter and offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_csv.py | 57 +++++++------------------------------------- 1 file changed, 8 insertions(+), 49 deletions(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 9f8913e1edd7d5..a968213c23fedf 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1364,55 +1364,14 @@ class TestSniffer(unittest.TestCase): sample15 = "\n\n\n" sample16 = "abc\ndef\nghi" - sample17 = """\ -name,age -Alice,30 -Bob,40 -Carol,50 -Dave,60 -Eve,70 -Frank,80 -Grace,90 -Heidi,100 -Ivan,110 -Judy,120 -Karl,130 -Liam,140 -Mallory,150 -Niaj,160 -Olivia,170 -Peggy,180 -Quinn,190 -Rupert,200 -Sybil,210 -Trent,220 -Victor,not_a_number -""" - sample18 = """\ -name,age -Alice,30 -Bob,40 -Carol,50 -Dave,60 -Eve,70 -Frank,80 -Grace,90 -Heidi,100 -Ivan,110 -Judy,120 -Karl,130 -Liam,140 -Mallory,150 -Niaj,160 -Olivia,170 -Peggy,180 -Quinn,190 -Rupert,200 -Sybil,210 -Trent,220 -Uma,230 -Victor,not_a_number -""" + sample17 = ["letter,offset"] + sample17.extend(f"{chr(ord('a') + i)},{i}" for i in range(20)) + sample17.append("v,twenty_one") + sample17 = '\n'.join(sample17) + sample18 = ["letter,offset"] + sample18.extend(f"{chr(ord('a') + i)},{i}" for i in range(21)) + sample18.append("v,twenty_one") + sample18 = '\n'.join(sample18) def test_issue43625(self): sniffer = csv.Sniffer() From ccf93cfd7f17a23f0aee36ea94cdefeff19a5aeb Mon Sep 17 00:00:00 2001 From: Jan-Eric Nitschke Date: Mon, 8 Sep 2025 16:12:31 +0200 Subject: [PATCH 5/5] Address review comment --- Lib/test/test_csv.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index a968213c23fedf..98ee0c3cdd7a06 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -922,6 +922,7 @@ def test_dict_reader_set_fieldnames(self): fieldnames = ["a", "b", "c"] f = StringIO() reader = csv.DictReader(f) + self.assertIsNone(reader.fieldnames) reader.fieldnames = fieldnames self.assertEqual(reader.fieldnames, fieldnames) @@ -940,7 +941,7 @@ def test_dict_writer_fieldnames_accepts_list(self): def test_dict_reader_fieldnames_is_optional(self): f = StringIO() reader = csv.DictReader(f, fieldnames=None) - self.assertEqual(reader.fieldnames, None) + self.assertIsNone(reader.fieldnames) def test_read_dict_fields(self): with TemporaryFile("w+", encoding="utf-8") as fileobj: @@ -1366,11 +1367,12 @@ class TestSniffer(unittest.TestCase): sample17 = ["letter,offset"] sample17.extend(f"{chr(ord('a') + i)},{i}" for i in range(20)) - sample17.append("v,twenty_one") + sample17.append("v,twenty_one") # 'u' was skipped sample17 = '\n'.join(sample17) + sample18 = ["letter,offset"] sample18.extend(f"{chr(ord('a') + i)},{i}" for i in range(21)) - sample18.append("v,twenty_one") + sample18.append("v,twenty_one") # 'u' was not skipped sample18 = '\n'.join(sample18) def test_issue43625(self): @@ -1396,8 +1398,8 @@ def test_has_header_regex_special_delimiter(self): def test_has_header_checks_20_rows(self): sniffer = csv.Sniffer() - self.assertIs(sniffer.has_header(self.sample17), False) - self.assertIs(sniffer.has_header(self.sample18), True) + self.assertFalse(sniffer.has_header(self.sample17)) + self.assertTrue(sniffer.has_header(self.sample18)) def test_guess_quote_and_delimiter(self): sniffer = csv.Sniffer() @@ -1453,7 +1455,6 @@ def test_delimiters(self): self.assertRaisesRegex(csv.Error, "Could not determine delimiter", sniffer.sniff, self.sample16) - def test_doublequote(self): sniffer = csv.Sniffer() dialect = sniffer.sniff(self.header1)