Permalink
Browse files

Added field name uniqification

A user presented a table with non-unique field names. This change
ensures that all PostgreSQL columns have unique names, even if the XBase
input file does not.
  • Loading branch information...
1 parent 16ef003 commit d974e5deaef0c657765d16cb1c1050d37464e5fe @kstrauser committed Mar 24, 2012
Showing with 83 additions and 19 deletions.
  1. +69 −18 src/pgdbf.c
  2. +14 −1 src/pgdbf.h
View
@@ -73,21 +73,21 @@ int main(int argc, char **argv) {
char *t;
char *u;
int lastcharwasreplaced = 0;
-
- /* Datetime calculation stuff */
- int32_t juliandays;
- int32_t seconds;
- int hours;
- int minutes;
-
int i;
+ int j;
int isreservedname;
int printed;
size_t blocksread;
size_t longestfield = 32; /* Make sure we leave at least enough room
* to print out long formatted numbers, like
* currencies. */
+ /* Datetime calculation stuff */
+ int32_t juliandays;
+ int32_t seconds;
+ int hours;
+ int minutes;
+
/* Command line option parsing */
int opt;
int optexitcode = -1; /* Left at -1 means that the arguments were
@@ -108,7 +108,10 @@ int main(int argc, char **argv) {
/* Describing the PostgreSQL table */
char *tablename;
char *baretablename;
- char fieldname[11];
+ char (*fieldnames)[MAXCOLUMNNAMESIZE];
+ int isuniquename;
+ char basename[MAXCOLUMNNAMESIZE];
+ int serial;
/* Attempt to parse any command line arguments */
while((opt = getopt(argc, argv, "cCdDeEhm:nNpPqQtTuU")) != -1) {
@@ -386,6 +389,61 @@ int main(int argc, char **argv) {
printf(" %s; SET statement_timeout=0;\n", baretablename);
}
+ /* Uniqify the XBase field names. It's possible to have multiple fields
+ * with the same name, but PostgreSQL correctly considers that an error
+ * condition. */
+ if(optusecreatetable) {
+ fieldnames = calloc(fieldcount, MAXCOLUMNNAMESIZE);
+ if(fieldnames == NULL) {
+ exitwitherror("Unable to allocate the columnname uniqification buffer", 1);
+ }
+ for(fieldnum = 0; fieldnum < fieldcount; fieldnum++) {
+ /* Lowercase the field names to make PostgreSQL column names */
+ s = fields[fieldnum].name;
+ t = fieldnames[fieldnum];
+ while(*s) {
+ *t++ = tolower(*s++);
+ }
+ *t = '\0';
+ }
+ for(i = 1; i < fieldcount; i++) {
+ /* Search for duplicates in all the previously processed field names */
+ isuniquename = 1;
+ for(j = 0; j < i; j++) {
+ if(i != j && !strcmp(fieldnames[i], fieldnames[j])) {
+ isuniquename = 0;
+ break;
+ }
+ }
+ /* No duplicates? Move on to the next. */
+ if(isuniquename) {
+ continue;
+ }
+
+ /* Create a unique name by appending "_" plus an ever-increasing
+ * serial number to the end of the field name until it doesn't match
+ * any other field name. */
+ strcpy(basename, fieldnames[i]);
+ serial = 2;
+ while(!isuniquename) {
+ /* sprintf() is safe because it's impossible for the longest XBase
+ * field name plus an underscore plus a serial number (which can't
+ * be greater than 4 digits long because of XBase field count
+ * limits) plus the trailing \0 to be longer than
+ * MAXCOLUMNNAMESIZE. */
+ sprintf(fieldnames[i], "%s_%d", basename, serial);
+ isuniquename = 1;
+ for(j = 0; j < fieldcount; j++) {
+ if(j != i && !strcmp(fieldnames[i], fieldnames[j])) {
+ isuniquename = 0;
+ break;
+ }
+ }
+ serial++;
+ }
+ }
+ }
+
/* Generate the create table statement, do some sanity testing, and scan
* for a few additional output parameters. This is an ugly loop that
* does lots of stuff, but extracting it into two or more loops with the
@@ -403,25 +461,18 @@ int main(int argc, char **argv) {
printed = 1;
}
- s = fields[fieldnum].name;
- t = fieldname;
- while(*s) {
- *t++ = tolower(*s++);
- }
- *t = '\0';
-
if(optusecreatetable) {
/* If the fieldname is a reserved word, rename it to start with
* "tablename_" */
isreservedname = 0;
for(i = 0; RESERVEDWORDS[i]; i++ ) {
- if(!strcmp(fieldname, RESERVEDWORDS[i])) {
- printf("%s_%s ", tablename, fieldname);
+ if(!strcmp(fieldnames[fieldnum], RESERVEDWORDS[i])) {
+ printf("%s_%s ", tablename, fieldnames[fieldnum]);
isreservedname = 1;
break;
}
}
- if(!isreservedname) printf("%s ", fieldname);
+ if(!isreservedname) printf("%s ", fieldnames[fieldnum]);
}
switch(fields[fieldnum].type) {
View
@@ -40,6 +40,19 @@
#define NUMERICMEMOSTYLE 0
#define PACKEDMEMOSTYLE 1
+/* Don't edit this! It's defined in the XBase specification. */
+#define XBASEFIELDNAMESIZE 11
+
+/* This is the maximum size a generated PostgreSQL column size can possibly
+ * be. It's used when making unique versions of duplicated field names.
+ *
+ * 11 bytes for the maximum XBase field name length
+ * 1 byte for a "_" separator
+ * 5 bytes for the numeric "serial number" portion
+ * 1 byte for the trailing \0
+ */
+#define MAXCOLUMNNAMESIZE (XBASEFIELDNAMESIZE + 7)
+
static char staticbuf[STATICBUFFERSIZE + 1];
/* The list of reserved words that can't be used as column names, as per
@@ -149,7 +162,7 @@ typedef struct {
} DBFHEADER;
typedef struct {
- char name[11];
+ char name[XBASEFIELDNAMESIZE];
char type;
int32_t memaddress;
uint8_t length;

0 comments on commit d974e5d

Please sign in to comment.