Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Extract 'fields_format'

  • Loading branch information...
commit 350e2fe066c9c6cad737eec25eac69e35935c501 1 parent 96f116a
@jvirtanen authored
View
4 examples/yahoo-finance.c
@@ -20,11 +20,11 @@ main(void)
struct fields_reader *reader;
struct fields_record *record;
- reader = fields_read_file(stdin, &fields_csv);
+ reader = fields_read_file(stdin, &fields_csv, &fields_defaults);
if (reader == NULL)
die("fields_read_file");
- record = fields_record_alloc(&fields_csv);
+ record = fields_record_alloc(&fields_defaults);
if (record == NULL)
die("fields_record_alloc");
View
105 include/fields.h
@@ -50,28 +50,41 @@ extern "C" {
#define FIELDS_VERSION "0.5.0"
/*
- * Settings
- * --------
+ * Formats
+ * -------
*/
/*
- * Settings specify the input format and the reader and record configuration.
+ * The input format.
*/
-struct fields_settings;
+struct fields_format;
/*
- * Use comma-separated values (CSV) as the input format with the default
- * reader and record configuration. A comma (`,`) is used as the delimiter
- * and a double quote (`"`) for quoting.
+ * Comma-separated values (CSV) use a comma (`,`) as the delimiter and a
+ * double quote (`"`) for quoting.
*/
-extern const struct fields_settings fields_csv;
+extern const struct fields_format fields_csv;
/*
- * Use tab-separated values (TSV) as the input format with the default reader
- * and record configuration. A tab (`\t`) is used as the delimiter and quoting
+ * Tab-separated values (TSV) use a tab (`\t`) as the delimiter and quoting
* is disabled.
*/
-extern const struct fields_settings fields_tsv;
+extern const struct fields_format fields_tsv;
+
+/*
+ * Settings
+ * --------
+ */
+
+/*
+ * The settings for readers and records.
+ */
+struct fields_settings;
+
+/*
+ * The default settings.
+ */
+extern const struct fields_settings fields_defaults;
/*
* Fields
@@ -156,27 +169,30 @@ struct fields_reader;
/*
* Allocate a reader that reads from the specified buffer. The operation fails
- * if the settings are erroneous.
+ * if the input format or the settings are erroneous.
*
* - buffer: a buffer
* - buffer_size: size of the buffer
+ * - format: the input format
* - settings: the settings for the reader
*
* If successful, returns a reader object. Otherwise returns `NULL`.
*/
struct fields_reader *fields_read_buffer(const char *, size_t,
- const struct fields_settings *);
+ const struct fields_format *, const struct fields_settings *);
/*
* Allocate a reader that reads from the specified file. The operation fails
- * if the settings are erroneous.
+ * if the input format or the settings are erroneous.
*
* - file: a file
+ * - format: the input format
* - settings: the settings for the reader
*
* If successful, returns a reader object. Otherwise returns `NULL`.
*/
-struct fields_reader *fields_read_file(FILE *, const struct fields_settings *);
+struct fields_reader *fields_read_file(FILE *, const struct fields_format *,
+ const struct fields_settings *);
/*
* Deallocate the reader.
@@ -227,23 +243,58 @@ enum fields_reader_error
};
/*
- * Custom Settings
- * ---------------
+ * Custom Formats
+ * --------------
*/
-struct fields_settings
+struct fields_format
{
/*
* The delimiter character. Must not be `\n` or `\r`.
*/
- char delimiter;
+ char delimiter;
/*
* The quote character. Must not be `\n`, `\r` or the delimiter character.
* Set to `\0` to disable quoting.
*/
- char quote;
+ char quote;
+};
+/*
+ * Check whether the format is erroneous.
+ *
+ * - format: the format
+ *
+ * Returns an error code if the format is erroneous. Otherwise returns zero.
+ */
+int fields_format_error(const struct fields_format *);
+
+/*
+ * Get a string representation of an error code.
+ *
+ * - error: an error code
+ *
+ * Returns a string representation of the error code.
+ */
+const char *fields_format_strerror(int);
+
+/*
+ * The error codes for the format.
+ */
+enum fields_format_error
+{
+ FIELDS_FORMAT_ERROR_DELIMITER = 1,
+ FIELDS_FORMAT_ERROR_QUOTE = 2
+};
+
+/*
+ * Custom Settings
+ * ---------------
+ */
+
+struct fields_settings
+{
/*
* Expand the record if needed. If true, whenever the limit for the record
* buffer size or for the maximum number of fields in a record is reached,
@@ -301,11 +352,9 @@ const char *fields_settings_strerror(int);
*/
enum fields_settings_error
{
- FIELDS_SETTINGS_ERROR_DELIMITER = 1,
- FIELDS_SETTINGS_ERROR_QUOTE = 2,
- FIELDS_SETTINGS_ERROR_SOURCE_BUFFER_SIZE = 3,
- FIELDS_SETTINGS_ERROR_RECORD_BUFFER_SIZE = 4,
- FIELDS_SETTINGS_ERROR_RECORD_MAX_FIELDS = 5
+ FIELDS_SETTINGS_ERROR_SOURCE_BUFFER_SIZE = 1,
+ FIELDS_SETTINGS_ERROR_RECORD_BUFFER_SIZE = 2,
+ FIELDS_SETTINGS_ERROR_RECORD_MAX_FIELDS = 3
};
/*
@@ -335,17 +384,19 @@ typedef void fields_source_free_fn(void *);
/*
* Allocate a reader for the specified source. The operation fails if the
- * settings are erroneous.
+ * format or the settings are erroneous.
*
* - source: the source object
* - read: the read method
* - free: the free method
+ * - format: the input format
* - settings: the settings for the reader
*
* If successful, returns a reader object. Otherwise returns `NULL`.
*/
struct fields_reader *fields_reader_alloc(void *, fields_source_read_fn *,
- fields_source_free_fn *, const struct fields_settings *);
+ fields_source_free_fn *, const struct fields_format *,
+ const struct fields_settings *);
#ifdef __cplusplus
}
View
6 include/fields_posix.h
@@ -40,14 +40,16 @@ extern "C" {
/*
* Allocate a reader that reads from the specified file descriptor. The
- * operation fails if the settings are erroneous.
+ * operation fails if the input format or the settings are erroneous.
*
* - fd: a file descriptor
+ * - format: the input format
* - settings: the settings for the reader
*
* If successful, returns a reader object. Otherwise returns `NULL`.
*/
-struct fields_reader *fields_read_fd(int, const struct fields_settings *);
+struct fields_reader *fields_read_fd(int, const struct fields_format *,
+ const struct fields_settings *);
#ifdef __cplusplus
}
View
29 python/fields/csv.py
@@ -33,9 +33,9 @@ def reader(source, **kwargs):
class Reader(object):
def __init__(self, source, **kwargs):
- settings = _settings(kwargs)
+ fmt, settings = _parse(kwargs)
try:
- self.__reader = libfields.Reader(source, settings)
+ self.__reader = libfields.Reader(source, fmt, settings)
self.__record = libfields.Record(settings)
except ValueError as e:
raise Error(str(e))
@@ -54,22 +54,19 @@ def next(self):
return [self.__record.field(i) for i in xrange(self.__record.size())]
-def _settings(kwargs):
+def _parse(kwargs):
def as_char(value):
return value or '\0'
def as_int(value):
return int(bool(value))
- delimiter = kwargs.get('delimiter', ',')
- quote = kwargs.get('quotechar', '"')
- expand = kwargs.get('_expand', True)
- source_buffer_size = kwargs.get('_source_buffer_size', 4 * 1024)
- record_buffer_size = kwargs.get('_record_buffer_size', 1024 * 1024)
- record_max_fields = kwargs.get('_record_max_fields', 1023)
- return libfields.Settings(
- as_char(delimiter),
- as_char(quote),
- as_int(expand),
- source_buffer_size,
- record_buffer_size,
- record_max_fields
+ fmt = libfields.Format(
+ delimiter = as_char(kwargs.get('delimiter', ',')),
+ quote = as_char(kwargs.get('quotechar', '"'))
)
+ settings = libfields.Settings(
+ expand = as_int(kwargs.get('_expand', True)),
+ source_buffer_size = kwargs.get('_source_buffer_size', 4 * 1024),
+ record_buffer_size = kwargs.get('_record_buffer_size', 1024 * 1024),
+ record_max_fields = kwargs.get('_record_max_fields', 1023)
+ )
+ return (fmt, settings)
View
29 python/fields/libfields.py
@@ -18,15 +18,18 @@ class Field(ctypes.Structure):
class Reader(object):
- def __init__(self, source, settings):
+ def __init__(self, source, fmt, settings):
try:
self.source = source
- self.c = so.fields_read_fd(self.source.fileno(), settings)
+ self.c = so.fields_read_fd(self.source.fileno(), fmt, settings)
except AttributeError:
self.source = str(source)
self.c = so.fields_read_buffer(self.source, len(self.source),
- settings)
+ fmt, settings)
if not self.c:
+ result = so.fields_format_error(fmt)
+ if result != 0:
+ raise ValueError(so.fields_format_strerror(result))
result = so.fields_settings_error(settings)
if result != 0:
raise ValueError(so.fields_settings_strerror(result))
@@ -72,10 +75,17 @@ def size(self):
Record_p = ctypes.c_void_p
+class Format(ctypes.Structure):
+ _fields_ = [
+ ('delimiter', ctypes.c_char),
+ ('quote', ctypes.c_char)
+ ]
+
+Format_p = ctypes.POINTER(Format)
+
+
class Settings(ctypes.Structure):
_fields_ = [
- ('delimiter', ctypes.c_char),
- ('quote', ctypes.c_char),
('expand', ctypes.c_int),
('source_buffer_size', ctypes.c_size_t),
('record_buffer_size', ctypes.c_size_t),
@@ -88,11 +98,12 @@ class Settings(ctypes.Structure):
so.fields_read_buffer.argtypes = [
ctypes.POINTER(ctypes.c_char),
ctypes.c_size_t,
+ Format_p,
Settings_p
]
so.fields_read_buffer.restype = Reader_p
-so.fields_read_fd.argtypes = [ ctypes.c_int, Settings_p ]
+so.fields_read_fd.argtypes = [ ctypes.c_int, Format_p, Settings_p ]
so.fields_read_fd.restype = Reader_p
so.fields_reader_free.argtypes = [ Reader_p ]
@@ -119,6 +130,12 @@ class Settings(ctypes.Structure):
so.fields_record_size.argtypes = [ Record_p ]
so.fields_record_size.restype = ctypes.c_size_t
+so.fields_format_error.argtypes = [ Format_p ]
+so.fields_format_error.restype = ctypes.c_int
+
+so.fields_format_strerror.argtypes = [ ctypes.c_int ]
+so.fields_format_strerror.restype = ctypes.c_char_p
+
so.fields_settings_error.argtypes = [ Settings_p ]
so.fields_settings_error.restype = ctypes.c_int
View
4 python/test_fields.py
@@ -45,9 +45,9 @@ def test_lf_as_quote(self):
def test_equal_delimiter_and_quote(self):
self.assertFail('Bad quote character', delimiter=',', quotechar=',')
- def assertFail(self, message, **settings):
+ def assertFail(self, message, **kwargs):
try:
- fields.reader('', **settings)
+ fields.reader('', **kwargs)
self.fail()
except fields.Error as e:
self.assertEqual(str(e), message)
View
128 src/fields.c
@@ -66,7 +66,7 @@ static const char *fields_reader_end(const struct fields_reader *);
static int fields_reader_fill(struct fields_reader *);
static void fields_reader_skip(struct fields_reader *);
-static fields_parse_fn *fields_settings_parser(const struct fields_settings *);
+static fields_parse_fn *fields_format_parser(const struct fields_format *);
static int fields_parse_unquoted(struct fields_reader *,
struct fields_record *);
@@ -292,7 +292,7 @@ fields_record_normalize(struct fields_record *self)
struct fields_reader *
fields_read_buffer(const char *buffer, size_t buffer_size,
- const struct fields_settings *settings)
+ const struct fields_format *format, const struct fields_settings *settings)
{
struct fields_reader *reader;
struct fields_buffer *source;
@@ -302,7 +302,7 @@ fields_read_buffer(const char *buffer, size_t buffer_size,
return NULL;
reader = fields_reader_alloc(source, &fields_buffer_read,
- &fields_buffer_free, settings);
+ &fields_buffer_free, format, settings);
if (reader == NULL) {
fields_buffer_free(source);
return NULL;
@@ -312,7 +312,8 @@ fields_read_buffer(const char *buffer, size_t buffer_size,
}
struct fields_reader *
-fields_read_file(FILE *file, const struct fields_settings *settings)
+fields_read_file(FILE *file, const struct fields_format *format,
+ const struct fields_settings *settings)
{
struct fields_reader *reader;
struct fields_file *source;
@@ -322,7 +323,7 @@ fields_read_file(FILE *file, const struct fields_settings *settings)
return NULL;
reader = fields_reader_alloc(source, &fields_file_read, &fields_file_free,
- settings);
+ format, settings);
if (reader == NULL) {
fields_file_free(source);
return NULL;
@@ -333,10 +334,14 @@ fields_read_file(FILE *file, const struct fields_settings *settings)
struct fields_reader *
fields_reader_alloc(void *source, fields_source_read_fn *read_fn,
- fields_source_free_fn *free_fn, const struct fields_settings *settings)
+ fields_source_free_fn *free_fn, const struct fields_format *format,
+ const struct fields_settings *settings)
{
struct fields_reader *self;
+ if (fields_format_error(format) != 0)
+ return NULL;
+
if (fields_settings_error(settings) != 0)
return NULL;
@@ -347,9 +352,9 @@ fields_reader_alloc(void *source, fields_source_read_fn *read_fn,
self->source = source;
self->source_read = read_fn;
self->source_free = free_fn;
- self->delimiter = settings->delimiter;
- self->quote = settings->quote;
- self->parse = fields_settings_parser(settings);
+ self->delimiter = format->delimiter;
+ self->quote = format->quote;
+ self->parse = fields_format_parser(format);
self->buffer = NULL;
self->buffer_size = 0;
self->cursor = NULL;
@@ -444,48 +449,86 @@ fields_reader_strerror(int error)
}
/*
- * Settings
- * ========
+ * Formats
+ * =======
*/
-const struct fields_settings fields_csv =
+const struct fields_format fields_csv =
{
- .delimiter = ',',
- .quote = '"',
- .expand = true,
- .source_buffer_size = FIELDS_DEFAULT_SOURCE_BUFFER_SIZE,
- .record_buffer_size = FIELDS_DEFAULT_RECORD_BUFFER_SIZE,
- .record_max_fields = FIELDS_DEFAULT_RECORD_MAX_FIELDS
+ .delimiter = ',',
+ .quote = '"'
};
-const struct fields_settings fields_tsv =
+const struct fields_format fields_tsv =
{
- .delimiter = '\t',
- .quote = '\0',
- .expand = true,
- .source_buffer_size = FIELDS_DEFAULT_SOURCE_BUFFER_SIZE,
- .record_buffer_size = FIELDS_DEFAULT_RECORD_BUFFER_SIZE,
- .record_max_fields = FIELDS_DEFAULT_RECORD_MAX_FIELDS
+ .delimiter = '\t',
+ .quote = '\0'
};
int
-fields_settings_error(const struct fields_settings *settings)
+fields_format_error(const struct fields_format *format)
{
- if (settings->delimiter == '\n')
- return FIELDS_SETTINGS_ERROR_DELIMITER;
+ if (format->delimiter == '\n')
+ return FIELDS_FORMAT_ERROR_DELIMITER;
+
+ if (format->delimiter == '\r')
+ return FIELDS_FORMAT_ERROR_DELIMITER;
+
+ if (format->quote == '\n')
+ return FIELDS_FORMAT_ERROR_QUOTE;
+
+ if (format->quote == '\r')
+ return FIELDS_FORMAT_ERROR_QUOTE;
+
+ if (format->quote == format->delimiter)
+ return FIELDS_FORMAT_ERROR_QUOTE;
+
+ return 0;
+}
+
+const char *
+fields_format_strerror(int error)
+{
+ switch (error) {
+ case FIELDS_FORMAT_ERROR_DELIMITER:
+ return "Bad field delimiter";
+ case FIELDS_FORMAT_ERROR_QUOTE:
+ return "Bad quote character";
+ case 0:
+ return "";
+ default:
+ break;
+ }
- if (settings->delimiter == '\r')
- return FIELDS_SETTINGS_ERROR_DELIMITER;
+ return "Unknown error";
+}
- if (settings->quote == '\n')
- return FIELDS_SETTINGS_ERROR_QUOTE;
+static fields_parse_fn *
+fields_format_parser(const struct fields_format *format)
+{
+ if (format->quote != '\0')
+ return &fields_parse_quoted;
+ else
+ return &fields_parse_unquoted;
+}
- if (settings->quote == '\r')
- return FIELDS_SETTINGS_ERROR_QUOTE;
- if (settings->quote == settings->delimiter)
- return FIELDS_SETTINGS_ERROR_QUOTE;
+/*
+ * Settings
+ * ========
+ */
+const struct fields_settings fields_defaults =
+{
+ .expand = true,
+ .source_buffer_size = FIELDS_DEFAULT_SOURCE_BUFFER_SIZE,
+ .record_buffer_size = FIELDS_DEFAULT_RECORD_BUFFER_SIZE,
+ .record_max_fields = FIELDS_DEFAULT_RECORD_MAX_FIELDS
+};
+
+int
+fields_settings_error(const struct fields_settings *settings)
+{
if (settings->source_buffer_size < FIELDS_MINIMUM_SOURCE_BUFFER_SIZE)
return FIELDS_SETTINGS_ERROR_SOURCE_BUFFER_SIZE;
@@ -502,10 +545,6 @@ const char *
fields_settings_strerror(int error)
{
switch (error) {
- case FIELDS_SETTINGS_ERROR_DELIMITER:
- return "Bad field delimiter";
- case FIELDS_SETTINGS_ERROR_QUOTE:
- return "Bad quote character";
case FIELDS_SETTINGS_ERROR_SOURCE_BUFFER_SIZE:
return "Too low source buffer size";
case FIELDS_SETTINGS_ERROR_RECORD_BUFFER_SIZE:
@@ -521,15 +560,6 @@ fields_settings_strerror(int error)
return "Unknown error";
}
-static fields_parse_fn *
-fields_settings_parser(const struct fields_settings *settings)
-{
- if (settings->quote != '\0')
- return &fields_parse_quoted;
- else
- return &fields_parse_unquoted;
-}
-
/*
* Parsers
* =======
View
5 src/fields_posix.c
@@ -39,7 +39,8 @@ static int fields_fd_read(void *, const char **, size_t *);
static void fields_fd_free(void *);
struct fields_reader *
-fields_read_fd(int fd, const struct fields_settings *settings)
+fields_read_fd(int fd, const struct fields_format *format,
+ const struct fields_settings *settings)
{
struct fields_reader *reader;
struct fields_fd *source;
@@ -49,7 +50,7 @@ fields_read_fd(int fd, const struct fields_settings *settings)
return NULL;
reader = fields_reader_alloc(source, &fields_fd_read, &fields_fd_free,
- settings);
+ format, settings);
if (reader == NULL) {
fields_fd_free(source);
return NULL;
Please sign in to comment.
Something went wrong with that request. Please try again.