Permalink
Browse files

Escaping special symbols now working correct when dumping; typo on RE…

…ADME corrected; bad assertition removed on test_loads_dumps
  • Loading branch information...
1 parent 8913f58 commit 4984a4f7905db2039cb25420e6de5e70703fa9e6 @renatopp committed Mar 1, 2014
Showing with 77 additions and 5 deletions.
  1. +1 −1 README.rst
  2. +21 −2 arff.py
  3. +54 −0 tests/test_dump_escape.py
  4. +1 −1 tests/test_encode_data.py
  5. +0 −1 tests/test_loads_dumps.py
View
@@ -1,5 +1,5 @@
=========
-LIAF-ARFF
+LIAC-ARFF
=========
The liac-arff module implements functions to read and write ARFF files in
View
23 arff.py
@@ -160,6 +160,18 @@
_RE_RELATION = re.compile(r'^(\".*\"|\'.*\'|\S*)$', re.UNICODE)
_RE_ATTRIBUTE = re.compile(r'^(\".*\"|\'.*\'|\S*)\s+(.+)$', re.UNICODE)
_RE_TYPE_NOMINAL = re.compile(r'^\{\s*((\".*\"|\'.*\'|\S*)\s*,\s*)*(\".*\"|\'.*\'|\S*)}$', re.UNICODE)
+_RE_ESCAPE = re.compile(r'\\\'|\\\"|\\\%|[\\"\'%]')
+
+_ESCAPE_DCT = {
+ ' ': ' ',
+ "'": "\\'",
+ '"': '\\"',
+ '%': '\\%',
+ '\\': '\\',
+ '\\\'': '\\\'',
+ '\\"': '\\"',
+ '\\%': '\\%',
+}
# =============================================================================
# COMPATIBILITY WITH PYTHON 3.3 ===============================================
@@ -232,6 +244,11 @@ def __str__(self):
# =============================================================================
# INTERNAL ====================================================================
+def encode_string(s):
+ def replace(match):
+ return _ESCAPE_DCT[match.group(0)]
+ return u"'" + _RE_ESCAPE.sub(replace, s) + u"'"
+
class Conversor(object):
'''Conversor is a helper used for converting ARFF types to Python types.'''
@@ -592,8 +609,10 @@ def _encode_data(self, data):
new_data = []
for v in data:
s = unicode(v)
- if u' ' in s:
- s = u'"%s"'%s
+ for escape_char in _ESCAPE_DCT:
+ if escape_char in s:
+ s = encode_string(s)
+ break
new_data.append(s)
return u','.join(new_data)
View
@@ -0,0 +1,54 @@
+import unittest
+import arff
+
+ARFF_SOURCE = '''%
+@RELATION teste
+
+@ATTRIBUTE a STRING
+@ATTRIBUTE b {a, b, c, d}
+@ATTRIBUTE c STRING
+
+@DATA
+lorem, b, thisisavalidstatement
+lorem, b, 'this is a valid statement with an % symbol'
+lorem2, d, 'this is a valid statement'
+lorem3, c, 'this is a valid statement with double quotes included """""""! '
+lorem4, a, 'this is a valid statement with singlequotes included \' lol \'! '
+'''
+
+ARFF_DESTINY = '''@RELATION teste
+
+@ATTRIBUTE a STRING
+@ATTRIBUTE b {a, b, c, d}
+@ATTRIBUTE c STRING
+
+@DATA
+lorem,b,thisisavalidstatement
+lorem,b,'this is a valid statement with an \\% symbol'
+lorem2,d,'this is a valid statement'
+lorem3,c,'this is a valid statement with double quotes included \\"\\"\\"\\"\\"\\"\\"! '
+lorem4,a,'this is a valid statement with singlequotes included \\\' lol \\\'! '
+%
+%
+% '''
+
+
+class TestDumpEscape(unittest.TestCase):
+ def test_encode_source(self):
+ obj = arff.loads(ARFF_SOURCE)
+ result = arff.dumps(obj)
+ expected = ARFF_DESTINY
+
+ self.assertEqual(result, expected)
+
+ def test_encode_destiny(self):
+ src = ARFF_DESTINY
+
+ count = 0
+ while count < 10:
+ count += 1
+
+ obj = arff.loads(src)
+ src = arff.dumps(obj)
+ self.assertEqual(src, ARFF_DESTINY)
+
@@ -11,7 +11,7 @@ def test_simple(self):
fixture = [1, 3, 'Renato', 'Name with spaces']
result = encoder._encode_data(fixture)
- expected = u'1,3,Renato,"Name with spaces"'
+ expected = u"1,3,Renato,'Name with spaces'"
self.assertEqual(result, expected)
@@ -61,4 +61,3 @@ def test_simple(self):
arff = dumps(obj)
self.assertEqual(arff, ARFF)
- assert False

0 comments on commit 4984a4f

Please sign in to comment.