Permalink
Browse files

fuzzymunge: simplify files on command line

The whole list of arbitrary input and output files was nice
and flexible, but it made simple use somewhat of a pain.
Instead, let's provide sed-style options: either you dump
input files to stdout, or you modify files in-place. That
gives us a nice clean:

  fuzzydups -b base <in.list >out.list

or

  fuzzydups -i *.list <base

for the most common use cases.
  • Loading branch information...
1 parent e23ab1b commit c3ffe8a01db04aa60efed3562d2b1e4af5727128 @peff committed Sep 29, 2010
Showing with 64 additions and 27 deletions.
  1. +7 −9 Documentation/fuzzymunge.txt
  2. +18 −18 fuzzymunge.c
  3. +32 −0 util.c
  4. +7 −0 util.h
@@ -8,7 +8,7 @@ fuzzymunge - Convert inputs lines to fuzzy matches
SYNOPSIS
--------
[verse]
-fuzzymunge [limitopts] [-b <base>] [-i <input> ...] [-o <output> ...]
+fuzzymunge [limitopts] [-b <base>] [-i] [<file...>]
DESCRIPTION
-----------
@@ -24,15 +24,13 @@ Input/Output Options
-b <file>:: Read base records from <file> instead of from stdin.
--i <file>:: Read input records from <file> instead of from stdin.
+-i:: Edit files in-place.
--o <file>:: Write output to <file> instead of to stdout.
-
-Multiple input and output files can be given. If both are given, the
-number of input and output files must be equal. The contents of the
-first input file are munged into the first output file, and so on.
-This can be more efficient than invoking fuzzymunge multiple times, as
-the base records need only be read and sorted once.
+<files...>::
+A list of files to read as input. If `-i` is not used, the munged
+contents are written to stdout. If `-i` is used, each file is read,
+written to a tempfile, and the tempfile atomically renamed over the
+original.
include::limit-options.txt[]
View
@@ -8,7 +8,7 @@
void die_usage(void) {
die("%s",
"usage: fuzzymunge [-v] [-l limit] [-d limit]\n"
-" [-b base] [-i input] [-o output]\n"
+" [-b base] [-i] [<files...>]\n"
);
}
@@ -36,13 +36,11 @@ int main(int argc, char **argv)
struct trie t;
struct util_limit ul = UTIL_LIMIT_INIT;
struct util_array bases = UTIL_ARRAY_INIT;
- struct util_array inputs = UTIL_ARRAY_INIT;
- struct util_array outputs = UTIL_ARRAY_INIT;
+ int inplace = 0;
- while ((opt = getopt(argc, argv, "b:i:o:" UTIL_LIMIT_OPTS)) != -1) {
+ while ((opt = getopt(argc, argv, "b:i" UTIL_LIMIT_OPTS)) != -1) {
if (opt == 'b') util_array_push(&bases, optarg);
- else if(opt == 'i') util_array_push(&inputs, optarg);
- else if(opt == 'o') util_array_push(&outputs, optarg);
+ else if(opt == 'i') inplace = 1;
else if (!util_limit_opt(&ul, opt, optarg))
die_usage();
}
@@ -58,18 +56,20 @@ int main(int argc, char **argv)
util_trie_read(&t, util_open(bases.d[i], "r"));
}
- if (!inputs.len) {
- if (outputs.len > 1)
- die("can only use one output when reading from stdin");
- munge(&t, stdin, outputs.len ? util_open(outputs.d[0], "w") : stdout, &ul);
- }
- else {
- unsigned i;
- if (outputs.len && outputs.len != inputs.len)
- die("number of inputs and outputs must match");
- for (i = 0; i < inputs.len; i++)
- munge(&t, util_open(inputs.d[i], "r"),
- outputs.len ? util_open(outputs.d[i], "w") : stdout, &ul);
+ if (!*argv)
+ munge(&t, stdin, stdout, &ul);
+
+ for (; *argv; argv++) {
+ FILE *in = util_open(*argv, "r");
+
+ if (inplace) {
+ struct util_atomicfile tmp;
+ util_atomicfile_open(&tmp, *argv);
+ munge(&t, in, tmp.fh, &ul);
+ util_atomicfile_close(&tmp, *argv);
+ }
+ else
+ munge(&t, in, stdout, &ul);
}
return 0;
View
32 util.c
@@ -3,6 +3,9 @@
#include <string.h>
#include <errno.h>
#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
void die(const char *fmt, ...)
{
@@ -108,3 +111,32 @@ void util_trie_read(struct trie *t, FILE *in)
util_trie_insert(t, line);
}
}
+
+void util_atomicfile_open(struct util_atomicfile *af, const char *base)
+{
+ int fd;
+ struct stat st;
+
+ snprintf(af->fn, sizeof(af->fn), "%s.XXXXXX", base);
+ fd = mkstemp(af->fn);
+ if (fd < 0)
+ die_errno("unable to create tempfile '%s.XXXXXX'", base);
+ af->fh = fdopen(fd, "w");
+
+ if (stat(base, &st) < 0) {
+ if (errno != ENOENT)
+ die_errno("unable to stat %s", base);
+ }
+ else {
+ if (chmod(af->fn, st.st_mode) < 0)
+ die_errno("unable to chmod %s", af->fn);
+ }
+}
+
+void util_atomicfile_close(struct util_atomicfile *af, const char *dest)
+{
+ if (fclose(af->fh) == EOF)
+ die_errno("error writing to %s", af->fn);
+ if (rename(af->fn, dest) < 0)
+ die_errno("unable to rename %s to %s", af->fn, dest);
+}
View
7 util.h
@@ -30,4 +30,11 @@ struct util_array {
#define UTIL_ARRAY_INIT { NULL, 0, 0 }
void util_array_push(struct util_array *, const char *);
+struct util_atomicfile {
+ FILE *fh;
+ char fn[4096];
+};
+void util_atomicfile_open(struct util_atomicfile *af, const char *base);
+void util_atomicfile_close(struct util_atomicfile *af, const char *dest);
+
#endif /* UTIL_H */

0 comments on commit c3ffe8a

Please sign in to comment.