diff --git a/arff.py b/arff.py index c8f8b34..9902035 100644 --- a/arff.py +++ b/arff.py @@ -625,17 +625,22 @@ def _encode_attribute(self, name, type_): return u'%s %s %s'%(_TK_ATTRIBUTE, name, type_) - def _encode_data(self, data): + def _encode_data(self, data, attributes): '''(INTERNAL) Encodes a line of data. - Data instances follow the csv format, i.e, attribute values are + Data instances follow the csv format, i.e, attribute values are delimited by commas. After converted from csv. :param data: a list of values. + :param attributes: a list of attributes. Used to check if data is valid. :return: a string with the encoded data line. ''' + + if len(data) != len(attributes): + raise BadObject() + new_data = [] - for v in data: + for v, attr in zip(data, attributes): if v is None or v == u'': s = '?' else: @@ -701,12 +706,13 @@ def iter_encode(self, obj): yield self._encode_attribute(attr[0], attr[1]) yield u'' + attributes = obj['attributes'] # DATA yield _TK_DATA if obj.get('data'): for inst in obj['data']: - yield self._encode_data(inst) + yield self._encode_data(inst, attributes) # FILLER yield self._encode_comment() diff --git a/tests/test_encode_data.py b/tests/test_encode_data.py index 3dcdad8..bfa4c0c 100644 --- a/tests/test_encode_data.py +++ b/tests/test_encode_data.py @@ -1,26 +1,35 @@ import unittest import arff + class TestEncodeData(unittest.TestCase): - def get_encoder(self): - decoder = arff.ArffEncoder() - return decoder + def setUp(self): + self.attributes = [('a1', 'INTEGER'), ('a2', 'INTEGER'), + ('a3', 'STRING'), ('a4', 'STRING')] + self.encoder = arff.ArffEncoder() + def test_simple(self): - encoder = self.get_encoder() + encoder = self.encoder fixture = [1, 3, 'Renato', 'Name with spaces'] - result = encoder._encode_data(fixture) + result = encoder._encode_data(fixture, self.attributes) expected = u"1,3,Renato,'Name with spaces'" self.assertEqual(result, expected) def test_null_value(self): - encoder = self.get_encoder() + encoder = self.encoder fixture = [1, None, 'Renato', ''] - result = encoder._encode_data(fixture) + result = encoder._encode_data(fixture, self.attributes) expected = u"1,?,Renato,?" self.assertEqual(result, expected) + def test_too_short(self): + encoder = self.encoder + + fixture = [1, None] + self.assertRaises(arff.BadObject, encoder._encode_data, fixture, + self.attributes) \ No newline at end of file