Skip to content

Commit

Permalink
Add: encoder checks if a line has all attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
mfeurer committed Jul 14, 2015
1 parent 3c38d29 commit 630b790
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
14 changes: 10 additions & 4 deletions arff.py
Expand Up @@ -625,17 +625,22 @@ def _encode_attribute(self, name, type_):

return u'%s %s %s'%(_TK_ATTRIBUTE, name, type_)

def _encode_data(self, data):
def _encode_data(self, data, attributes):
'''(INTERNAL) Encodes a line of data.
Data instances follow the csv format, i.e, attribute values are
Data instances follow the csv format, i.e, attribute values are
delimited by commas. After converted from csv.
:param data: a list of values.
:param attributes: a list of attributes. Used to check if data is valid.
:return: a string with the encoded data line.
'''

if len(data) != len(attributes):
raise BadObject()

new_data = []
for v in data:
for v, attr in zip(data, attributes):
if v is None or v == u'':
s = '?'
else:
Expand Down Expand Up @@ -701,12 +706,13 @@ def iter_encode(self, obj):

yield self._encode_attribute(attr[0], attr[1])
yield u''
attributes = obj['attributes']

# DATA
yield _TK_DATA
if obj.get('data'):
for inst in obj['data']:
yield self._encode_data(inst)
yield self._encode_data(inst, attributes)

# FILLER
yield self._encode_comment()
Expand Down
23 changes: 16 additions & 7 deletions tests/test_encode_data.py
@@ -1,26 +1,35 @@
import unittest
import arff


class TestEncodeData(unittest.TestCase):
def get_encoder(self):
decoder = arff.ArffEncoder()
return decoder
def setUp(self):
self.attributes = [('a1', 'INTEGER'), ('a2', 'INTEGER'),
('a3', 'STRING'), ('a4', 'STRING')]
self.encoder = arff.ArffEncoder()


def test_simple(self):
encoder = self.get_encoder()
encoder = self.encoder

fixture = [1, 3, 'Renato', 'Name with spaces']
result = encoder._encode_data(fixture)
result = encoder._encode_data(fixture, self.attributes)
expected = u"1,3,Renato,'Name with spaces'"

self.assertEqual(result, expected)

def test_null_value(self):
encoder = self.get_encoder()
encoder = self.encoder

fixture = [1, None, 'Renato', '']
result = encoder._encode_data(fixture)
result = encoder._encode_data(fixture, self.attributes)
expected = u"1,?,Renato,?"

self.assertEqual(result, expected)

def test_too_short(self):
encoder = self.encoder

fixture = [1, None]
self.assertRaises(arff.BadObject, encoder._encode_data, fixture,
self.attributes)

0 comments on commit 630b790

Please sign in to comment.