diff --git a/tests/translate/storage/test_csvl10n.py b/tests/translate/storage/test_csvl10n.py index 3f07a7a8f..eb06b70e1 100644 --- a/tests/translate/storage/test_csvl10n.py +++ b/tests/translate/storage/test_csvl10n.py @@ -38,6 +38,32 @@ def test_utf_8(self): assert store.units[0].source == "test" assert store.units[0].target == "zkouška sirén" + def test_dialect(self): + payload = '"location","source","target"\r\n"foo.c:1","test","zkouška sirén"\r\n'.encode() + store = self.StoreClass() + store.parse(payload) + assert len(store.units) == 1 + assert store.units[0].source == "test" + assert store.units[0].target == "zkouška sirén" + + store = self.StoreClass() + store.parse(payload, dialect="excel") + assert len(store.units) == 1 + assert store.units[0].source == "test" + assert store.units[0].target == "zkouška sirén" + + store = self.StoreClass() + store.parse(payload, dialect="unix") + assert len(store.units) == 1 + assert store.units[0].source == "test" + assert store.units[0].target == "zkouška sirén" + + store = self.StoreClass() + store.parse(payload, dialect="default") + assert len(store.units) == 1 + assert store.units[0].source == "test" + assert store.units[0].target == "zkouška sirén" + def test_utf_8_sig(self): content = '"location";"source";"target"\r\n"foo.c:1";"test";"zkouška sirén"\r\n'.encode( "utf-8-sig" diff --git a/translate/storage/csvl10n.py b/translate/storage/csvl10n.py index c8e48626b..af357f78f 100644 --- a/translate/storage/csvl10n.py +++ b/translate/storage/csvl10n.py @@ -21,6 +21,8 @@ or entire files (csvfile) for use with localisation. """ +from __future__ import annotations + import csv from translate.storage import base @@ -291,7 +293,9 @@ def __init__(self, inputfile=None, fieldnames=None, encoding="auto"): inputfile.close() self.parse(csvsrc) - def parse(self, csvsrc, sample_length=1024): + def parse( + self, csvsrc, sample_length: int | None = 1024, *, dialect: None | str = None + ): if self._encoding == "auto": text, encoding = self.detect_encoding( csvsrc, default_encodings=["utf-8", "utf-16"] @@ -305,14 +309,17 @@ def parse(self, csvsrc, sample_length=1024): sniffer = csv.Sniffer() sample = text[:sample_length] if sample_length else text - try: - self.dialect = sniffer.sniff(sample) - if self.dialect.quoting == csv.QUOTE_MINIMAL: - # HACKISH: most probably a default, not real detection - self.dialect.quoting = csv.QUOTE_ALL - self.dialect.doublequote = True - except csv.Error: - self.dialect = "default" + if dialect is not None: + self.dialect = dialect + else: + try: + self.dialect = sniffer.sniff(sample) + if self.dialect.quoting == csv.QUOTE_MINIMAL: + # HACKISH: most probably a default, not real detection + self.dialect.quoting = csv.QUOTE_ALL + self.dialect.doublequote = True + except csv.Error: + self.dialect = "default" inputfile = csv.StringIO(text) try: