Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Echo: add --trim-columns, and tests for --contiguous and --trim-columns #167

Merged
merged 3 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 84 additions & 6 deletions app/echo.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <zsv/utils/compiler.h>
#include <zsv/utils/writer.h>
#include <zsv/utils/file.h>
#include <zsv/utils/string.h>
#include <zsv/utils/mem.h>

Expand Down Expand Up @@ -52,9 +53,13 @@ struct zsv_echo_data {

unsigned char *skip_until_prefix;
size_t skip_until_prefix_len;

char *tmp_fn;
unsigned max_nonempty_cols;
unsigned char trim_white:1;
unsigned char trim_columns:1;
unsigned char contiguous:1;
unsigned char _:6;
unsigned char _:5;
};

/**
Expand Down Expand Up @@ -87,10 +92,28 @@ void zsv_echo_get_next_overwrite(struct zsv_echo_data *data) {
}
}

static void zsv_echo_get_max_nonempty_cols(void *hook) {
struct zsv_echo_data *data = hook;
unsigned row_nonempty_col_count = 0;
for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
struct zsv_cell cell = zsv_get_cell(data->parser, i);
if(UNLIKELY(data->trim_white))
cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len);
if(cell.len)
row_nonempty_col_count = i+1;
}
if(data->max_nonempty_cols < row_nonempty_col_count)
data->max_nonempty_cols = row_nonempty_col_count;
}

static void zsv_echo_row(void *hook) {
struct zsv_echo_data *data = hook;
size_t j = zsv_cell_count(data->parser);
if(UNLIKELY(data->trim_columns && j > data->max_nonempty_cols))
j = data->max_nonempty_cols;

if(VERY_UNLIKELY(data->row_ix == 0)) { // header
for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
for(size_t i = 0; i < j; i++) {
struct zsv_cell cell = zsv_get_cell(data->parser, i);
if(UNLIKELY(data->trim_white))
cell.str = (unsigned char *)zsv_strtrim(cell.str, &cell.len);
Expand All @@ -99,7 +122,7 @@ static void zsv_echo_row(void *hook) {
} else if(VERY_UNLIKELY(data->contiguous && zsv_row_is_blank(data->parser))) {
zsv_abort(data->parser);
} else {
for(size_t i = 0, j = zsv_cell_count(data->parser); i < j; i++) {
for(size_t i = 0; i < j; i++) {
if(VERY_UNLIKELY(data->overwrite.row_ix == data->row_ix && data->overwrite.col_ix == i)) {
zsv_writer_cell(data->csv_writer, i == 0, data->overwrite.str, data->overwrite.len, 1);
zsv_echo_get_next_overwrite(data);
Expand Down Expand Up @@ -135,6 +158,7 @@ const char *zsv_echo_usage_msg[] = {
"Options:",
" -b : output with BOM",
" --trim : trim whitespace",
" --trim-columns : trim blank columns",
" --contiguous : stop output upon scanning an entire row of blank values",
" --skip-until <value>: ignore all leading rows until the first row whose first column starts with the given value ",
" --overwrite <source>: overwrite cells using given source. Source may be:",
Expand All @@ -161,6 +185,11 @@ static void zsv_echo_cleanup(struct zsv_echo_data *data) {
fclose(data->in);
if(data->o.sqlite3.db)
sqlite3_close(data->o.sqlite3.db);

if(data->tmp_fn) {
remove(data->tmp_fn);
free(data->tmp_fn);
}
}

#define zsv_echo_sqlite3_prefix "sqlite3://"
Expand Down Expand Up @@ -235,6 +264,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
writer_opts.with_bom = 1;
else if(!strcmp(arg, "--contiguous"))
data.contiguous = 1;
else if(!strcmp(arg, "--trim-columns"))
data.trim_columns = 1;
else if(!strcmp(arg, "--trim"))
data.trim_white = 1;
else if(!strcmp(arg, "--skip-until")) {
Expand Down Expand Up @@ -294,10 +325,57 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
return 1;
}

unsigned char buff[4096];
if(data.skip_until_prefix)
opts->row_handler = zsv_echo_row_skip_until;
else
else {
if(data.trim_columns) {
// first, save the file if it is stdin
if(data.in == stdin) {
if(!(data.tmp_fn = zsv_get_temp_filename("zsv_echo_XXXXXXXX"))) {
zsv_echo_cleanup(&data);
return 1;
}

FILE *f = fopen(data.tmp_fn, "wb");
if(!f) {
perror(data.tmp_fn);
zsv_echo_cleanup(&data);
return 1;
} else {
size_t bytes_read;
while((bytes_read = fread(buff, 1, sizeof(buff), data.in)) > 0)
fwrite(buff, 1, bytes_read, f);
fclose(f);
if(!(data.in = fopen(data.tmp_fn, "rb"))) {
perror(data.tmp_fn);
zsv_echo_cleanup(&data);
return 1;
}
}
}
// next, determine the max number of columns from the left that contains data
struct zsv_opts tmp_opts = *opts;
tmp_opts.row_handler = zsv_echo_get_max_nonempty_cols;
tmp_opts.stream = data.in;
tmp_opts.ctx = &data;
if(zsv_new_with_properties(&tmp_opts, custom_prop_handler, data.input_path, opts_used, &data.parser) != zsv_status_ok) {
zsv_echo_cleanup(&data);
return 1;
} else {
// find the max nonempty col count
enum zsv_status status;
while(!zsv_signal_interrupted && (status = zsv_parse_more(data.parser)) == zsv_status_ok) ;
zsv_finish(data.parser);
zsv_delete(data.parser);
data.parser = NULL;

// re-open the input again
data.in = fopen(data.tmp_fn ? data.tmp_fn : data.input_path, "rb");
}
}
opts->row_handler = zsv_echo_row;
}
opts->stream = data.in;
opts->ctx = &data;
data.csv_writer = zsv_writer_new(&writer_opts);
Expand All @@ -320,8 +398,8 @@ int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *op
}

// create a local csv writer buff for faster performance
unsigned char writer_buff[64];
zsv_writer_set_temp_buff(data.csv_writer, writer_buff, sizeof(writer_buff));
// unsigned char writer_buff[64];
zsv_writer_set_temp_buff(data.csv_writer, buff, sizeof(buff));

// process the input data.
zsv_handle_ctrl_c_signal();
Expand Down
17 changes: 16 additions & 1 deletion app/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ test: ${TESTS}
test-prop:
EXE=${BUILD_DIR}/bin/zsv_prop${EXE} make -C prop test

test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until
test-echo : test-echo1 test-echo-overwrite test-echo-eol test-echo-overwrite-csv test-echo-chars test-echo-trim test-echo-skip-until test-echo-contiguous test-echo-trim-columns test-echo-trim-columns-2

test-echo1: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
Expand All @@ -124,6 +124,21 @@ test-echo-skip-until: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${PREFIX} $< --skip-until ASF ${TEST_DATA_DIR}/test/echo-skip-until.csv ${REDIRECT} ${TMP_DIR}/$@.out
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo-contiguous: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
@${PREFIX} $< --contiguous ${TEST_DATA_DIR}/test/../../data/test/echo-contiguous.csv ${REDIRECT} ${TMP_DIR}/$@.out
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo-trim-columns: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
@${PREFIX} $< --trim-columns ${TEST_DATA_DIR}/test/echo-trim-columns.csv ${REDIRECT} ${TMP_DIR}/$@.out
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo-trim-columns-2: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
@${PREFIX} $< --trim --trim-columns ${TEST_DATA_DIR}/test/echo-trim-columns.csv ${REDIRECT} ${TMP_DIR}/$@.out
@${CMP} ${TMP_DIR}/$@.out expected/$@.out && ${TEST_PASS} || ${TEST_FAIL}

test-echo-chars: ${BUILD_DIR}/bin/zsv_echo${EXE}
@${TEST_INIT}
@${PREFIX} echo '東京都' | $< -u '?' ${REDIRECT} ${TMP_DIR}/$@.out
Expand Down
2 changes: 2 additions & 0 deletions app/test/expected/test-echo-contiguous.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
abc,def,,,,
1,2,3,,,,
4 changes: 4 additions & 0 deletions app/test/expected/test-echo-trim-columns-2.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abc,def,,
1,2,3,
4,,,
5,6,7,8
4 changes: 4 additions & 0 deletions app/test/expected/test-echo-trim-columns.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abc,def,,,
1,2,3,,
4,,,,
5,6,7,8,
4 changes: 4 additions & 0 deletions data/test/echo-contiguous.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abc,def,,,,
1,2,3,,,,
,,,,,,,,
5,6,7,8,,,,
4 changes: 4 additions & 0 deletions data/test/echo-trim-columns.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
abc,def,,,,
1,2,3,, ,,
4,,,,,,,
5,6,7,8,,,,