Skip to content

Commit

Permalink
Escaping special symbols now working correct when dumping; typo on RE…
Browse files Browse the repository at this point in the history
…ADME corrected; bad assertition removed on test_loads_dumps
  • Loading branch information
renatopp committed Mar 1, 2014
1 parent 8913f58 commit 4984a4f
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.rst
@@ -1,5 +1,5 @@
=========
LIAF-ARFF
LIAC-ARFF
=========

The liac-arff module implements functions to read and write ARFF files in
Expand Down
23 changes: 21 additions & 2 deletions arff.py
Expand Up @@ -160,6 +160,18 @@
_RE_RELATION = re.compile(r'^(\".*\"|\'.*\'|\S*)$', re.UNICODE)
_RE_ATTRIBUTE = re.compile(r'^(\".*\"|\'.*\'|\S*)\s+(.+)$', re.UNICODE)
_RE_TYPE_NOMINAL = re.compile(r'^\{\s*((\".*\"|\'.*\'|\S*)\s*,\s*)*(\".*\"|\'.*\'|\S*)}$', re.UNICODE)
_RE_ESCAPE = re.compile(r'\\\'|\\\"|\\\%|[\\"\'%]')

_ESCAPE_DCT = {
' ': ' ',
"'": "\\'",
'"': '\\"',
'%': '\\%',
'\\': '\\',
'\\\'': '\\\'',
'\\"': '\\"',
'\\%': '\\%',
}
# =============================================================================

# COMPATIBILITY WITH PYTHON 3.3 ===============================================
Expand Down Expand Up @@ -232,6 +244,11 @@ def __str__(self):
# =============================================================================

# INTERNAL ====================================================================
def encode_string(s):
def replace(match):
return _ESCAPE_DCT[match.group(0)]
return u"'" + _RE_ESCAPE.sub(replace, s) + u"'"

class Conversor(object):
'''Conversor is a helper used for converting ARFF types to Python types.'''

Expand Down Expand Up @@ -592,8 +609,10 @@ def _encode_data(self, data):
new_data = []
for v in data:
s = unicode(v)
if u' ' in s:
s = u'"%s"'%s
for escape_char in _ESCAPE_DCT:
if escape_char in s:
s = encode_string(s)
break
new_data.append(s)

return u','.join(new_data)
Expand Down
54 changes: 54 additions & 0 deletions tests/test_dump_escape.py
@@ -0,0 +1,54 @@
import unittest
import arff

ARFF_SOURCE = '''%
@RELATION teste
@ATTRIBUTE a STRING
@ATTRIBUTE b {a, b, c, d}
@ATTRIBUTE c STRING
@DATA
lorem, b, thisisavalidstatement
lorem, b, 'this is a valid statement with an % symbol'
lorem2, d, 'this is a valid statement'
lorem3, c, 'this is a valid statement with double quotes included """""""! '
lorem4, a, 'this is a valid statement with singlequotes included \' lol \'! '
'''

ARFF_DESTINY = '''@RELATION teste
@ATTRIBUTE a STRING
@ATTRIBUTE b {a, b, c, d}
@ATTRIBUTE c STRING
@DATA
lorem,b,thisisavalidstatement
lorem,b,'this is a valid statement with an \\% symbol'
lorem2,d,'this is a valid statement'
lorem3,c,'this is a valid statement with double quotes included \\"\\"\\"\\"\\"\\"\\"! '
lorem4,a,'this is a valid statement with singlequotes included \\\' lol \\\'! '
%
%
% '''


class TestDumpEscape(unittest.TestCase):
def test_encode_source(self):
obj = arff.loads(ARFF_SOURCE)
result = arff.dumps(obj)
expected = ARFF_DESTINY

self.assertEqual(result, expected)

def test_encode_destiny(self):
src = ARFF_DESTINY

count = 0
while count < 10:
count += 1

obj = arff.loads(src)
src = arff.dumps(obj)
self.assertEqual(src, ARFF_DESTINY)

2 changes: 1 addition & 1 deletion tests/test_encode_data.py
Expand Up @@ -11,7 +11,7 @@ def test_simple(self):

fixture = [1, 3, 'Renato', 'Name with spaces']
result = encoder._encode_data(fixture)
expected = u'1,3,Renato,"Name with spaces"'
expected = u"1,3,Renato,'Name with spaces'"

self.assertEqual(result, expected)

1 change: 0 additions & 1 deletion tests/test_loads_dumps.py
Expand Up @@ -61,4 +61,3 @@ def test_simple(self):
arff = dumps(obj)
self.assertEqual(arff, ARFF)

assert False

0 comments on commit 4984a4f

Please sign in to comment.