Permalink
Browse files

Extract 'fields_format'

  • Loading branch information...
1 parent 96f116a commit 350e2fe066c9c6cad737eec25eac69e35935c501 @jvirtanen committed Nov 7, 2012
Showing with 204 additions and 106 deletions.
  1. +2 −2 examples/yahoo-finance.c
  2. +78 −27 include/fields.h
  3. +4 −2 include/fields_posix.h
  4. +13 −16 python/fields/csv.py
  5. +23 −6 python/fields/libfields.py
  6. +2 −2 python/test_fields.py
  7. +79 −49 src/fields.c
  8. +3 −2 src/fields_posix.c
View
@@ -20,11 +20,11 @@ main(void)
struct fields_reader *reader;
struct fields_record *record;
- reader = fields_read_file(stdin, &fields_csv);
+ reader = fields_read_file(stdin, &fields_csv, &fields_defaults);
if (reader == NULL)
die("fields_read_file");
- record = fields_record_alloc(&fields_csv);
+ record = fields_record_alloc(&fields_defaults);
if (record == NULL)
die("fields_record_alloc");
View
@@ -50,28 +50,41 @@ extern "C" {
#define FIELDS_VERSION "0.5.0"
/*
- * Settings
- * --------
+ * Formats
+ * -------
*/
/*
- * Settings specify the input format and the reader and record configuration.
+ * The input format.
*/
-struct fields_settings;
+struct fields_format;
/*
- * Use comma-separated values (CSV) as the input format with the default
- * reader and record configuration. A comma (`,`) is used as the delimiter
- * and a double quote (`"`) for quoting.
+ * Comma-separated values (CSV) use a comma (`,`) as the delimiter and a
+ * double quote (`"`) for quoting.
*/
-extern const struct fields_settings fields_csv;
+extern const struct fields_format fields_csv;
/*
- * Use tab-separated values (TSV) as the input format with the default reader
- * and record configuration. A tab (`\t`) is used as the delimiter and quoting
+ * Tab-separated values (TSV) use a tab (`\t`) as the delimiter and quoting
* is disabled.
*/
-extern const struct fields_settings fields_tsv;
+extern const struct fields_format fields_tsv;
+
+/*
+ * Settings
+ * --------
+ */
+
+/*
+ * The settings for readers and records.
+ */
+struct fields_settings;
+
+/*
+ * The default settings.
+ */
+extern const struct fields_settings fields_defaults;
/*
* Fields
@@ -156,27 +169,30 @@ struct fields_reader;
/*
* Allocate a reader that reads from the specified buffer. The operation fails
- * if the settings are erroneous.
+ * if the input format or the settings are erroneous.
*
* - buffer: a buffer
* - buffer_size: size of the buffer
+ * - format: the input format
* - settings: the settings for the reader
*
* If successful, returns a reader object. Otherwise returns `NULL`.
*/
struct fields_reader *fields_read_buffer(const char *, size_t,
- const struct fields_settings *);
+ const struct fields_format *, const struct fields_settings *);
/*
* Allocate a reader that reads from the specified file. The operation fails
- * if the settings are erroneous.
+ * if the input format or the settings are erroneous.
*
* - file: a file
+ * - format: the input format
* - settings: the settings for the reader
*
* If successful, returns a reader object. Otherwise returns `NULL`.
*/
-struct fields_reader *fields_read_file(FILE *, const struct fields_settings *);
+struct fields_reader *fields_read_file(FILE *, const struct fields_format *,
+ const struct fields_settings *);
/*
* Deallocate the reader.
@@ -227,23 +243,58 @@ enum fields_reader_error
};
/*
- * Custom Settings
- * ---------------
+ * Custom Formats
+ * --------------
*/
-struct fields_settings
+struct fields_format
{
/*
* The delimiter character. Must not be `\n` or `\r`.
*/
- char delimiter;
+ char delimiter;
/*
* The quote character. Must not be `\n`, `\r` or the delimiter character.
* Set to `\0` to disable quoting.
*/
- char quote;
+ char quote;
+};
+/*
+ * Check whether the format is erroneous.
+ *
+ * - format: the format
+ *
+ * Returns an error code if the format is erroneous. Otherwise returns zero.
+ */
+int fields_format_error(const struct fields_format *);
+
+/*
+ * Get a string representation of an error code.
+ *
+ * - error: an error code
+ *
+ * Returns a string representation of the error code.
+ */
+const char *fields_format_strerror(int);
+
+/*
+ * The error codes for the format.
+ */
+enum fields_format_error
+{
+ FIELDS_FORMAT_ERROR_DELIMITER = 1,
+ FIELDS_FORMAT_ERROR_QUOTE = 2
+};
+
+/*
+ * Custom Settings
+ * ---------------
+ */
+
+struct fields_settings
+{
/*
* Expand the record if needed. If true, whenever the limit for the record
* buffer size or for the maximum number of fields in a record is reached,
@@ -301,11 +352,9 @@ const char *fields_settings_strerror(int);
*/
enum fields_settings_error
{
- FIELDS_SETTINGS_ERROR_DELIMITER = 1,
- FIELDS_SETTINGS_ERROR_QUOTE = 2,
- FIELDS_SETTINGS_ERROR_SOURCE_BUFFER_SIZE = 3,
- FIELDS_SETTINGS_ERROR_RECORD_BUFFER_SIZE = 4,
- FIELDS_SETTINGS_ERROR_RECORD_MAX_FIELDS = 5
+ FIELDS_SETTINGS_ERROR_SOURCE_BUFFER_SIZE = 1,
+ FIELDS_SETTINGS_ERROR_RECORD_BUFFER_SIZE = 2,
+ FIELDS_SETTINGS_ERROR_RECORD_MAX_FIELDS = 3
};
/*
@@ -335,17 +384,19 @@ typedef void fields_source_free_fn(void *);
/*
* Allocate a reader for the specified source. The operation fails if the
- * settings are erroneous.
+ * format or the settings are erroneous.
*
* - source: the source object
* - read: the read method
* - free: the free method
+ * - format: the input format
* - settings: the settings for the reader
*
* If successful, returns a reader object. Otherwise returns `NULL`.
*/
struct fields_reader *fields_reader_alloc(void *, fields_source_read_fn *,
- fields_source_free_fn *, const struct fields_settings *);
+ fields_source_free_fn *, const struct fields_format *,
+ const struct fields_settings *);
#ifdef __cplusplus
}
View
@@ -40,14 +40,16 @@ extern "C" {
/*
* Allocate a reader that reads from the specified file descriptor. The
- * operation fails if the settings are erroneous.
+ * operation fails if the input format or the settings are erroneous.
*
* - fd: a file descriptor
+ * - format: the input format
* - settings: the settings for the reader
*
* If successful, returns a reader object. Otherwise returns `NULL`.
*/
-struct fields_reader *fields_read_fd(int, const struct fields_settings *);
+struct fields_reader *fields_read_fd(int, const struct fields_format *,
+ const struct fields_settings *);
#ifdef __cplusplus
}
View
@@ -33,9 +33,9 @@ def reader(source, **kwargs):
class Reader(object):
def __init__(self, source, **kwargs):
- settings = _settings(kwargs)
+ fmt, settings = _parse(kwargs)
try:
- self.__reader = libfields.Reader(source, settings)
+ self.__reader = libfields.Reader(source, fmt, settings)
self.__record = libfields.Record(settings)
except ValueError as e:
raise Error(str(e))
@@ -54,22 +54,19 @@ def next(self):
return [self.__record.field(i) for i in xrange(self.__record.size())]
-def _settings(kwargs):
+def _parse(kwargs):
def as_char(value):
return value or '\0'
def as_int(value):
return int(bool(value))
- delimiter = kwargs.get('delimiter', ',')
- quote = kwargs.get('quotechar', '"')
- expand = kwargs.get('_expand', True)
- source_buffer_size = kwargs.get('_source_buffer_size', 4 * 1024)
- record_buffer_size = kwargs.get('_record_buffer_size', 1024 * 1024)
- record_max_fields = kwargs.get('_record_max_fields', 1023)
- return libfields.Settings(
- as_char(delimiter),
- as_char(quote),
- as_int(expand),
- source_buffer_size,
- record_buffer_size,
- record_max_fields
+ fmt = libfields.Format(
+ delimiter = as_char(kwargs.get('delimiter', ',')),
+ quote = as_char(kwargs.get('quotechar', '"'))
)
+ settings = libfields.Settings(
+ expand = as_int(kwargs.get('_expand', True)),
+ source_buffer_size = kwargs.get('_source_buffer_size', 4 * 1024),
+ record_buffer_size = kwargs.get('_record_buffer_size', 1024 * 1024),
+ record_max_fields = kwargs.get('_record_max_fields', 1023)
+ )
+ return (fmt, settings)
View
@@ -18,15 +18,18 @@ class Field(ctypes.Structure):
class Reader(object):
- def __init__(self, source, settings):
+ def __init__(self, source, fmt, settings):
try:
self.source = source
- self.c = so.fields_read_fd(self.source.fileno(), settings)
+ self.c = so.fields_read_fd(self.source.fileno(), fmt, settings)
except AttributeError:
self.source = str(source)
self.c = so.fields_read_buffer(self.source, len(self.source),
- settings)
+ fmt, settings)
if not self.c:
+ result = so.fields_format_error(fmt)
+ if result != 0:
+ raise ValueError(so.fields_format_strerror(result))
result = so.fields_settings_error(settings)
if result != 0:
raise ValueError(so.fields_settings_strerror(result))
@@ -72,10 +75,17 @@ def size(self):
Record_p = ctypes.c_void_p
+class Format(ctypes.Structure):
+ _fields_ = [
+ ('delimiter', ctypes.c_char),
+ ('quote', ctypes.c_char)
+ ]
+
+Format_p = ctypes.POINTER(Format)
+
+
class Settings(ctypes.Structure):
_fields_ = [
- ('delimiter', ctypes.c_char),
- ('quote', ctypes.c_char),
('expand', ctypes.c_int),
('source_buffer_size', ctypes.c_size_t),
('record_buffer_size', ctypes.c_size_t),
@@ -88,11 +98,12 @@ class Settings(ctypes.Structure):
so.fields_read_buffer.argtypes = [
ctypes.POINTER(ctypes.c_char),
ctypes.c_size_t,
+ Format_p,
Settings_p
]
so.fields_read_buffer.restype = Reader_p
-so.fields_read_fd.argtypes = [ ctypes.c_int, Settings_p ]
+so.fields_read_fd.argtypes = [ ctypes.c_int, Format_p, Settings_p ]
so.fields_read_fd.restype = Reader_p
so.fields_reader_free.argtypes = [ Reader_p ]
@@ -119,6 +130,12 @@ class Settings(ctypes.Structure):
so.fields_record_size.argtypes = [ Record_p ]
so.fields_record_size.restype = ctypes.c_size_t
+so.fields_format_error.argtypes = [ Format_p ]
+so.fields_format_error.restype = ctypes.c_int
+
+so.fields_format_strerror.argtypes = [ ctypes.c_int ]
+so.fields_format_strerror.restype = ctypes.c_char_p
+
so.fields_settings_error.argtypes = [ Settings_p ]
so.fields_settings_error.restype = ctypes.c_int
View
@@ -45,9 +45,9 @@ def test_lf_as_quote(self):
def test_equal_delimiter_and_quote(self):
self.assertFail('Bad quote character', delimiter=',', quotechar=',')
- def assertFail(self, message, **settings):
+ def assertFail(self, message, **kwargs):
try:
- fields.reader('', **settings)
+ fields.reader('', **kwargs)
self.fail()
except fields.Error as e:
self.assertEqual(str(e), message)
Oops, something went wrong.

0 comments on commit 350e2fe

Please sign in to comment.