Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
Checking mergeability… Don't worry, you can still create the pull request.
  • 2 commits
  • 6 files changed
  • 0 commit comments
  • 1 contributor
View
2  panda/tests/__init__.py
@@ -18,6 +18,6 @@
from panda.tests.test_solr import TestSolrJSONEncoder
from panda.tests.test_related_upload import TestRelatedUpload
from panda.tests.test_user import TestUser
-from panda.tests.test_utils import TestCSV, TestXLS, TestXLSX
+from panda.tests.test_utils import TestCSV, TestXLS, TestXLSX, TestTypeCoercion
from panda.tests.test_views import TestLogin, TestActivate
View
23 panda/tests/test_dataset.py
@@ -575,3 +575,26 @@ def test_generate_typed_column_names_conflict(self):
self.assertEqual([c['indexed_name'] for c in self.dataset.column_schema], ['column_int_test', None, 'column_unicode_test', 'column_unicode_test2'])
+ def test_reindex_with_currency(self):
+ upload = utils.get_test_data_upload(self.user, self.dataset, filename=utils.TEST_MONEY)
+ self.dataset.import_data(self.user, upload)
+
+ # Refresh from database
+ dataset = Dataset.objects.get(id=self.dataset.id)
+
+ dataset.reindex_data(self.user, typed_columns=[False, True], column_types=['unicode', 'float'])
+
+ # Refresh from database
+ dataset = Dataset.objects.get(id=self.dataset.id)
+
+ self.assertEqual([c['name'] for c in dataset.column_schema], ['product', 'price'])
+ self.assertEqual([c['type'] for c in dataset.column_schema], ['unicode', 'float'])
+ self.assertEqual([c['indexed'] for c in dataset.column_schema], [False, True])
+ self.assertEqual([c['indexed_name'] for c in dataset.column_schema], [None, 'column_float_price'])
+ self.assertEqual([c['min'] for c in dataset.column_schema], [None, 39.99])
+ self.assertEqual([c['max'] for c in dataset.column_schema], [None, 2599.00])
+
+ self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:39.99')['response']['numFound'], 2)
+ self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:[1500 TO *]')['response']['numFound'], 2)
+ self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'column_float_price:*')['response']['numFound'], 8)
+
View
61 panda/tests/test_utils.py
@@ -1,10 +1,12 @@
#!/usr/bin/env python
+from datetime import date, time, datetime
import os.path
from django.test import TestCase
from panda import utils
+from panda.exceptions import TypeCoercionError
from panda.tests import utils as test_utils
class TestCSV(TestCase):
@@ -114,3 +116,62 @@ def test_xlsx_guess_column_types(self):
self.assertEqual(guessed_types, ['unicode', 'date', 'int', 'bool', 'float', 'time', 'datetime', None, 'unicode'])
+class TestTypeCoercion(TestCase):
+ def setUp(self):
+ self.data_typer = utils.typecoercion.DataTyper([])
+ self.coerce_type = self.data_typer.coerce_type
+
+ def test_coerce_nulls(self):
+ self.assertEqual(self.coerce_type(None, bool), None)
+ self.assertEqual(self.coerce_type('N/A', int), None)
+ self.assertEqual(self.coerce_type('n/a', datetime), None)
+
+ def test_coerce_int_from_str(self):
+ self.assertEqual(self.coerce_type('171', int), 171)
+
+ def test_coerce_int_from_str_fails(self):
+ with self.assertRaises(TypeCoercionError):
+ self.assertEqual(self.coerce_type('#171', int), 171)
+
+ def test_coerce_int_from_unicode(self):
+ self.assertEqual(self.coerce_type(u'171', int), 171)
+
+ def test_coerce_int_from_currency_str(self):
+ self.assertEqual(self.coerce_type('$171,000', int), 171000)
+
+ def test_coerce_int_from_currency_float(self):
+ self.assertEqual(self.coerce_type(u'$171,000', int), 171000)
+
+ def test_coerce_float_from_str(self):
+ self.assertEqual(self.coerce_type('171.59', float), 171.59)
+
+ def test_coerce_float_from_unicode(self):
+ self.assertEqual(self.coerce_type(u'171.59', float), 171.59)
+
+ def test_coerce_float_from_currency_str(self):
+ self.assertEqual(self.coerce_type('$171,000.59', float), 171000.59)
+
+ def test_coerce_float_from_currency_float(self):
+ self.assertEqual(self.coerce_type(u'$171,000.59', float), 171000.59)
+
+ def test_coerce_bool_from_str(self):
+ self.assertEqual(self.coerce_type('True', bool), True)
+ self.assertEqual(self.coerce_type('true', bool), True)
+ self.assertEqual(self.coerce_type('T', bool), True)
+ self.assertEqual(self.coerce_type('yes', bool), True)
+
+ def test_coerce_bool_from_unicode(self):
+ self.assertEqual(self.coerce_type(u'True', bool), True)
+ self.assertEqual(self.coerce_type(u'true', bool), True)
+ self.assertEqual(self.coerce_type(u'T', bool), True)
+ self.assertEqual(self.coerce_type(u'yes', bool), True)
+
+ def test_coerce_datetime_from_str(self):
+ self.assertEqual(self.coerce_type('2011-4-13 8:28 AM', datetime), datetime(2011, 4, 13, 8, 28, 0))
+
+ def test_coerce_date_from_str(self):
+ self.assertEqual(self.coerce_type('2011-4-13', date), datetime(2011, 4, 13, 0, 0, 0))
+
+ def test_coerce_time_from_str(self):
+ self.assertEqual(self.coerce_type('8:28 AM', time), datetime(9999, 12, 31, 8, 28, 0))
+
View
1  panda/tests/utils.py
@@ -19,6 +19,7 @@
TEST_EXCEL_XLSX_FILENAME = 'contributors.excel.xlsx'
TEST_OO_XLSX_FILENAME = 'contributors.oo.xlsx'
TEST_LATIN1_FILENAME = 'test_not_unicode_sample.csv'
+TEST_MONEY = 'test_money.csv'
def setup_test_solr():
settings.SOLR_DATA_CORE = 'data_test'
View
18 panda/utils/typecoercion.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# -*- coding: utf-8 -*-
from datetime import date, time, datetime
@@ -17,6 +18,11 @@
'time': time
}
+CURRENCY_SYMBOLS_ASCII = '$,'
+
+# Via http://en.wikipedia.org/wiki/Currency_sign
+CURRENCY_SYMBOLS_UNICODE_TRANSLATE_TABLE = dict([(ord(c), None) for c in '$,€£₱؋฿₵₡₫ƒ₣₲₴₭ლ₥₦£៛₹₪৳₮₩¥'])
+
class DataTyper(object):
"""
A callable object that adds typed columns to a Solr object based on a Dataset schema.
@@ -98,6 +104,12 @@ def coerce_type(self, value, normal_type):
return unicode(value)
# int
elif normal_type is int:
+ # Filter currency symbols
+ if isinstance(value, str):
+ value = value.translate(None, CURRENCY_SYMBOLS_ASCII)
+ elif isinstance(value, unicode):
+ value = value.translate(CURRENCY_SYMBOLS_UNICODE_TRANSLATE_TABLE)
+
return int(value)
# bool
elif normal_type is bool:
@@ -114,6 +126,12 @@ def coerce_type(self, value, normal_type):
return bool(value)
# float
elif normal_type is float:
+ # Filter currency symbols
+ if isinstance(value, str):
+ value = value.translate(None, CURRENCY_SYMBOLS_ASCII)
+ elif isinstance(value, unicode):
+ value = value.translate(CURRENCY_SYMBOLS_UNICODE_TRANSLATE_TABLE)
+
return float(value)
# date, time, datetime
elif normal_type in [date, time, datetime]:
View
9 test_data/test_money.csv
@@ -0,0 +1,9 @@
+product,price
+MacBook Air 11-inch,$999
+MacBook Air 13-inch,$1299
+MacBook Pro 13-inch,$1199
+MacBook Pro 15-inch,$1799
+MacBook Pro 17-inch,$2599
+iPhone 4S,$199
+AT&T Service,$39.99
+Verizon Service,$39.99

No commit comments for this range

Something went wrong with that request. Please try again.