Skip to content

Commit

Permalink
test csv file in utf-16-le, utf-32-le, utf-16-be and utf-32-be with m…
Browse files Browse the repository at this point in the history
…map #33
  • Loading branch information
chfw committed May 16, 2017
1 parent b37dab1 commit 33c0e4b
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 6 deletions.
22 changes: 16 additions & 6 deletions pyexcel_io/fileformat/_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,27 @@ def __init__(self, mmap_obj, encoding):
self.__encoding = encoding
self.__count = 0
self.__endian = LITTLE_ENDIAN
if encoding.startswith('utf-8'):
if encoding == 'utf-8':
# ..\r\x00\n
# \x00\x..
self.__zeros_left_in_2_row = 0
elif encoding.startswith('utf-16'):
elif encoding == 'utf-16':
# ..\r\x00\n
# \x00\x..
self.__zeros_left_in_2_row = 1
elif encoding.startswith('utf-32'):
elif encoding == 'utf-32':
# \r\x00\x00\x00\n
# \x00\x00\x00\x..
self.__zeros_left_in_2_row = 3
elif encoding == 'utf-32-be' or encoding == 'utf-16-be':
self.__zeros_left_in_2_row = 0
self.__endian = BIG_ENDIAN
elif encoding == 'utf-32-le':
self.__zeros_left_in_2_row = 3
self.__endian = LITTLE_ENDIAN
elif encoding == 'utf-16-le':
self.__zeros_left_in_2_row = 1
self.__endian = LITTLE_ENDIAN
else:
raise Exception("Encoding %s is not supported" % encoding)

Expand All @@ -59,15 +68,16 @@ def __iter__(self):
def __next__(self):
line = self.__mmap_obj.readline()
if self.__count == 0:
utf_16_32 = (self.__encoding.startswith('utf-16') or
self.__encoding.startswith('utf-32'))
utf_16_32 = (self.__encoding == 'utf-16' or
self.__encoding == 'utf-32')
if utf_16_32:
bom_header = line[:2]
if bom_header == BOM_BIG_ENDIAN:
self.__endian = BIG_ENDIAN
elif self.__endian == LITTLE_ENDIAN:
line = line[self.__zeros_left_in_2_row:]
line = line.rstrip()
if self.__endian == LITTLE_ENDIAN:
line = line.rstrip()
line = line.decode(self.__encoding)
self.__count += 1
if line == '':
Expand Down
28 changes: 28 additions & 0 deletions tests/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,41 @@ def test_issue_33_34_utf32_encoded_file():
check_mmap_encoding('utf-32')


def test_issue_33_34_utf32be_encoded_file():
if PY26:
pass
else:
check_mmap_encoding('utf-32-be')


def test_issue_33_34_utf32le_encoded_file():
if PY26:
pass
else:
check_mmap_encoding('utf-32-le')


def test_issue_33_34_utf16_encoded_file():
if PY26:
pass
else:
check_mmap_encoding('utf-16')


def test_issue_33_34_utf16be_encoded_file():
if PY26:
pass
else:
check_mmap_encoding('utf-16-be')


def test_issue_33_34_utf16le_encoded_file():
if PY26:
pass
else:
check_mmap_encoding('utf-16-le')


def test_issue_33_34_utf8_encoded_file():
if PY26:
pass
Expand Down

0 comments on commit 33c0e4b

Please sign in to comment.