From 95c6a5e20e9b6cd7b0eb692863fcb1289068c0f4 Mon Sep 17 00:00:00 2001 From: Justin Walgran Date: Fri, 26 Apr 2019 12:21:57 -0700 Subject: [PATCH] Handle CSV files with UTF-8 BOM Some applications, when saving files with a UTF-8 encoding, will prepend a byte order mark (BOM) at the beginning of the file. By using the `utf-8-sig` encoding when reading files we will ignore the BOM if it is present. --- CHANGELOG.md | 1 + src/django/api/tests.py | 13 +++++++++++++ src/django/api/views.py | 4 ++-- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7257be854..a0aea31de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed - Change facility list CSV download to request one page at a time [#496](https://github.com/open-apparel-registry/open-apparel-registry/pull/496) +- Handle CSV files that include a byte order mark [#498](https://github.com/open-apparel-registry/open-apparel-registry/pull/498) ### Deprecated diff --git a/src/django/api/tests.py b/src/django/api/tests.py index 4f57bf98e..662b43efb 100644 --- a/src/django/api/tests.py +++ b/src/django/api/tests.py @@ -49,6 +49,11 @@ def setUp(self): 'facilities.csv', b'\n'.join([s.encode() for s in self.test_csv_rows]), content_type='text/csv') + self.test_file_with_bom = SimpleUploadedFile( + 'facilities_with_bom.csv', + b'\n'.join([self.test_csv_rows[0].encode('utf-8-sig')] + + [s.encode() for s in self.test_csv_rows[1:]]), + content_type='text/csv') def post_header_only_file(self, **kwargs): if kwargs is None: @@ -60,6 +65,14 @@ def post_header_only_file(self, **kwargs): {'file': csv_file, **kwargs}, format='multipart') + def test_can_post_file_with_bom(self): + response = self.client.post(reverse('facility-list-list'), + {'file': self.test_file_with_bom}, + format='multipart') + self.assertEqual(response.status_code, status.HTTP_200_OK) + new_list = FacilityList.objects.last() + self.assertEqual(self.test_csv_rows[0], new_list.header) + def test_creates_list_and_items(self): previous_list_count = FacilityList.objects.all().count() previous_item_count = FacilityListItem.objects.all().count() diff --git a/src/django/api/views.py b/src/django/api/views.py index 17bff985e..0215273e7 100644 --- a/src/django/api/views.py +++ b/src/django/api/views.py @@ -603,7 +603,7 @@ def create(self, request): 'Uploaded file exceeds the maximum size of {:.1f}MB.'.format( mb)) try: - header = csv_file.readline().decode().rstrip() + header = csv_file.readline().decode(encoding='utf-8-sig').rstrip() except UnicodeDecodeError: ROLLBAR = getattr(settings, 'ROLLBAR', {}) if ROLLBAR: @@ -671,7 +671,7 @@ def create(self, request): items.append(FacilityListItem( row_index=(idx - 1), facility_list=new_list, - raw_data=line.decode().rstrip() + raw_data=line.decode(encoding='utf-8-sig').rstrip() )) except UnicodeDecodeError: ROLLBAR = getattr(settings, 'ROLLBAR', {})