Permalink
Browse files

Intelligently handle currency symbols in int and float columns. Closes

  • Loading branch information...
1 parent a88a5f8 commit 60640bb95ac255d53bee635e20af1e9a493ed94f @onyxfish onyxfish committed Apr 13, 2012
View
@@ -18,6 +18,6 @@
from panda.tests.test_solr import TestSolrJSONEncoder
from panda.tests.test_related_upload import TestRelatedUpload
from panda.tests.test_user import TestUser
-from panda.tests.test_utils import TestCSV, TestXLS, TestXLSX
+from panda.tests.test_utils import TestCSV, TestXLS, TestXLSX, TestTypeCoercion
from panda.tests.test_views import TestLogin, TestActivate
@@ -575,3 +575,26 @@ def test_generate_typed_column_names_conflict(self):
self.assertEqual([c['indexed_name'] for c in self.dataset.column_schema], ['column_int_test', None, 'column_unicode_test', 'column_unicode_test2'])
+ def test_reindex_with_currency(self):
+ upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_MONEY)
+ self.dataset.import_data(self.user, upload)
+
+ # Refresh from database
+ dataset = Dataset.objects.get(id=self.dataset.id)
+
+ dataset.reindex_data(self.user, typed_columns=[False, True], column_types=['unicode', 'float'])
+
+ # Refresh from database
+ dataset = Dataset.objects.get(id=self.dataset.id)
+
+ self.assertEqual([c['name'] for c in dataset.column_schema], ['product', 'price'])
+ self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'float'])
+ self.assertEqual([c['indexed'] for c in dataset.column_schema], [False, True])
+ self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, 'column_float_price'])
+ self.assertEqual([c['min'] for c in dataset.column_schema], [None, 39.99])
+ self.assertEqual([c['max'] for c in dataset.column_schema], [None, 2599.00])
+
+ self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:39.99')['response']['numFound'], 2)
+ self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:[1500 TO *]')['response']['numFound'], 2)
+ self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:*')['response']['numFound'], 8)
+
View
@@ -1,10 +1,12 @@
#!/usr/bin/env python
+from datetime import date, time, datetime
import os.path
from django.test import TestCase
from panda import utils
+from panda.exceptions import TypeCoercionError
from panda.tests import utils as test_utils
class TestCSV(TestCase):
@@ -114,3 +116,62 @@ def test_xlsx_guess_column_types(self):
self.assertEqual(guessed_types, ['unicode', 'date', 'int', 'bool', 'float', 'time', 'datetime', None, 'unicode'])
+class TestTypeCoercion(TestCase):
+ def setUp(self):
+ self.data_typer = utils.typecoercion.DataTyper([])
+ self.coerce_type = self.data_typer.coerce_type
+
+ def test_coerce_nulls(self):
+ self.assertEqual(self.coerce_type(None, bool), None)
+ self.assertEqual(self.coerce_type('N/A', int), None)
+ self.assertEqual(self.coerce_type('n/a', datetime), None)
+
+ def test_coerce_int_from_str(self):
+ self.assertEqual(self.coerce_type('171', int), 171)
+
+ def test_coerce_int_from_str_fails(self):
+ with self.assertRaises(TypeCoercionError):
+ self.assertEqual(self.coerce_type('#171', int), 171)
+
+ def test_coerce_int_from_unicode(self):
+ self.assertEqual(self.coerce_type(u'171', int), 171)
+
+ def test_coerce_int_from_currency_str(self):
+ self.assertEqual(self.coerce_type('$171,000', int), 171000)
+
+ def test_coerce_int_from_currency_float(self):
+ self.assertEqual(self.coerce_type(u'$171,000', int), 171000)
+
+ def test_coerce_float_from_str(self):
+ self.assertEqual(self.coerce_type('171.59', float), 171.59)
+
+ def test_coerce_float_from_unicode(self):
+ self.assertEqual(self.coerce_type(u'171.59', float), 171.59)
+
+ def test_coerce_float_from_currency_str(self):
+ self.assertEqual(self.coerce_type('$171,000.59', float), 171000.59)
+
+ def test_coerce_float_from_currency_float(self):
+ self.assertEqual(self.coerce_type(u'$171,000.59', float), 171000.59)
+
+ def test_coerce_bool_from_str(self):
+ self.assertEqual(self.coerce_type('True', bool), True)
+ self.assertEqual(self.coerce_type('true', bool), True)
+ self.assertEqual(self.coerce_type('T', bool), True)
+ self.assertEqual(self.coerce_type('yes', bool), True)
+
+ def test_coerce_bool_from_unicode(self):
+ self.assertEqual(self.coerce_type(u'True', bool), True)
+ self.assertEqual(self.coerce_type(u'true', bool), True)
+ self.assertEqual(self.coerce_type(u'T', bool), True)
+ self.assertEqual(self.coerce_type(u'yes', bool), True)
+
+ def test_coerce_datetime_from_str(self):
+ self.assertEqual(self.coerce_type('2011-4-13 8:28 AM', datetime), datetime(2011, 4, 13, 8, 28, 0))
+
+ def test_coerce_date_from_str(self):
+ self.assertEqual(self.coerce_type('2011-4-13', date), datetime(2011, 4, 13, 0, 0, 0))
+
+ def test_coerce_time_from_str(self):
+ self.assertEqual(self.coerce_type('8:28 AM', time), datetime(9999, 12, 31, 8, 28, 0))
+
View
@@ -19,6 +19,7 @@
TEST_EXCEL_XLSX_FILENAME = 'contributors.excel.xlsx'
TEST_OO_XLSX_FILENAME = 'contributors.oo.xlsx'
TEST_LATIN1_FILENAME = 'test_not_unicode_sample.csv'
+TEST_MONEY = 'test_money.csv'
def setup_test_solr():
settings.SOLR_DATA_CORE = 'data_test'
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
from datetime import date, time, datetime
@@ -17,6 +18,9 @@
'time': time
}
+CURRENCY_SYMBOLS_ASCII = '$,'
+CURRENCY_SYMBOLS_UNICODE_TRANSLATE_TABLE = dict([(ord(c), None) for c in '$,€£₱'])
+
class DataTyper(object):
"""
A callable object that adds typed columns to a Solr object based on a Dataset schema.
@@ -98,6 +102,12 @@ def coerce_type(self, value, normal_type):
return unicode(value)
# int
elif normal_type is int:
+ # Filter currency symbols
+ if isinstance(value, str):
+ value = value.translate(None, CURRENCY_SYMBOLS_ASCII)
+ elif isinstance(value, unicode):
+ value = value.translate(CURRENCY_SYMBOLS_UNICODE_TRANSLATE_TABLE)
+
return int(value)
# bool
elif normal_type is bool:
@@ -114,6 +124,12 @@ def coerce_type(self, value, normal_type):
return bool(value)
# float
elif normal_type is float:
+ # Filter currency symbols
+ if isinstance(value, str):
+ value = value.translate(None, CURRENCY_SYMBOLS_ASCII)
+ elif isinstance(value, unicode):
+ value = value.translate(CURRENCY_SYMBOLS_UNICODE_TRANSLATE_TABLE)
+
return float(value)
# date, time, datetime
elif normal_type in [date, time, datetime]:
View
@@ -0,0 +1,9 @@
+product,price
+MacBook Air 11-inch,$999
+MacBook Air 13-inch,$1299
+MacBook Pro 13-inch,$1199
+MacBook Pro 15-inch,$1799
+MacBook Pro 17-inch,$2599
+iPhone 4S,$199
+AT&T Service,$39.99
+Verizon Service,$39.99

0 comments on commit 60640bb

Please sign in to comment.