Browse files

added command-line program

  • Loading branch information...
1 parent da68b4c commit f70864f146250a2d82f8d89fbe493ed7a882f1b4 @stedolan committed Dec 7, 2009
Showing with 188 additions and 63 deletions.
  1. +4 −11 bzipindex.c
  2. +22 −49 bzipseek.c
  3. +19 −3 bzipseek.h → bzseek.h
  4. +143 −0 main.c
View
15 bzipindex.c
@@ -24,7 +24,7 @@
#include <unistd.h>
#include <sys/types.h>
-#include "bzipseek.h"
+#include "bzseek.h"
/* Constants for huffman coding */
#define MAX_GROUPS 6
@@ -461,9 +461,12 @@ bzseek_err bzseek_build_index(const char* src_name, const char* idx_name) {
if (idx_name){
src_fd = fopen(src_name, "r");
+ if (!src_fd) return BZSEEK_IO_ERR;
idx_fd = fopen(idx_name, "r+");
+ if (!idx_fd) return BZSEEK_IO_ERR;
}else{
src_fd = idx_fd = fopen(src_name, "r+");
+ if (!idx_fd) return BZSEEK_IO_ERR;
}
@@ -596,13 +599,3 @@ bzseek_err bzseek_build_index(const char* src_name, const char* idx_name) {
free(bd);
return err;
}
-
-
-/* Dumb little test thing, decompress stdin to stdout */
-int main(int argc, char *argv[]) {
- int err = bzseek_build_index(argv[1], NULL);
- if (err){
- fprintf(stderr, "Error: %s\n", bunzip_errors[-err]);
- }
- return 0;
-}
View
71 bzipseek.c
@@ -12,25 +12,10 @@
#include <errno.h>
-#include "bzipseek.h"
+#include "bzseek.h"
-struct bzseek_file{
- FILE* f_data;
- FILE* f_idx;
- int blocksz;
-
- int idx_nitems;
- uint64_t* idx_data;
-
-
- char* buf;
- int buflen, bufsize;
- int curr_block;
-
- bz_stream bz;
-};
// Poor man's exceptions :D
#define ATTEMPT(action) if ((err = action) != BZSEEK_OK) return err
@@ -71,7 +56,7 @@ static bzseek_err load_block(bzseek_file* f, uint64_t start, uint64_t end){
(bit_data[i+1] >> (8-start_off));
}
-
+ /* this could equally be caused by a bad index file */
if (memcmp(blk_header, "\x31\x41\x59\x26\x53\x59", 6))
return BZSEEK_BAD_DATA;
@@ -133,7 +118,7 @@ static uint64_t get_bz_uncomp_pos(bzseek_file* f){
return bzproduced + idx_uncomp_pos(f, f->curr_block);
}
-static bzseek_err run_bz(bzseek_file* f, int* count, int* len, char** buf){
+static bzseek_err run_bz(bzseek_file* f, int* count, unsigned int* len, char** buf){
f->bz.next_out = *buf;
f->bz.avail_out = *len;
int oldlen = *len;
@@ -254,12 +239,25 @@ static bzseek_err load_index(bzseek_file* f){
-bzseek_err bzseek_open(bzseek_file* file, FILE* data_file, FILE* idx_file){
- if (!data_file){
+bzseek_err bzseek_open(bzseek_file* file, const char* data_filename, const char* idx_filename){
+ if (!data_filename){
errno = EBADF;
return BZSEEK_IO_ERR;
}
- if (!idx_file) idx_file = data_file;
+
+ FILE* data_file = fopen(data_filename, "r");
+ if (!data_file) return BZSEEK_IO_ERR;
+
+ FILE* idx_file;
+ if (idx_filename){
+ idx_file = fopen(idx_filename, "r");
+ }else{
+ idx_file = data_file;
+ }
+ if (!idx_file) return BZSEEK_IO_ERR;
+
+
+
memset(file, 0, sizeof(file));
file->f_data = data_file;
@@ -292,7 +290,7 @@ uint64_t bzseek_len(bzseek_file* file){
#define NULL_BUF_SZ 1024
-bzseek_err bzseek_read(bzseek_file* file, uint64_t start, int len, char* buf){
+bzseek_err bzseek_read(bzseek_file* file, uint64_t start, unsigned int len, char* buf){
bzseek_err err = BZSEEK_OK;
/* loop in case the request spans multiple blocks */
@@ -323,7 +321,8 @@ bzseek_err bzseek_read(bzseek_file* file, uint64_t start, int len, char* buf){
int seek_forward = (int)(start - bzpos);
while (seek_forward > 0){
char* null_buf = devnull;
- int null_len = seek_forward > NULL_BUF_SZ ? NULL_BUF_SZ : seek_forward;
+ unsigned int null_len =
+ seek_forward > NULL_BUF_SZ ? NULL_BUF_SZ : seek_forward;
int cnt;
ATTEMPT(run_bz(file, &cnt, &null_len, &null_buf));
seek_forward -= cnt;
@@ -364,29 +363,3 @@ const char* bzseek_errmsg(bzseek_err err){
}
}
-int main(int argc, char* argv[]){
- // bunzip_block(9, open("testfile.bz2", O_RDONLY), 32, 6458889 /*6458866*/);
- // bunzip_block(9, open("test3.bz2", O_RDONLY), atoi(argv[1]), atoi(argv[2]));
- bzseek_file f;
- // f.f_data = f.f_idx = fopen("index","r");
- // load_index(&f);
- bzseek_err err;
- err = bzseek_open(&f, fopen("test4.bz2", "r"), NULL);
- if (err){
- printf("error opening: %s\n", bzseek_errmsg(err));
- return 0;
- }
- char x[100000];
- uint64_t start = atoi(argv[1]);
- int len = atoi(argv[2]);
- err = bzseek_read(&f, start, len, x);
-
- if (err == BZSEEK_EOF){
- len = bzseek_len(&f) - start;
- }else if (err){
- printf("error: %s\n" , bzseek_errmsg(err));
- }
- fwrite(x, 1, len, stdout);
- bzseek_close(&f);
- return 0;
-}
View
22 bzipseek.h → bzseek.h
@@ -26,19 +26,35 @@ bzseek_err bzseek_build_index(const char* src_name, const char* idx_name);
/* a seekable bzip file. All of the fields of this structure are private */
-typedef struct bzseek_file bzseek_file;
+typedef struct bzseek_file{
+ FILE* f_data;
+ FILE* f_idx;
+
+ int blocksz;
+
+ int idx_nitems;
+ uint64_t* idx_data;
+
+
+ char* buf;
+ int buflen, bufsize;
+ int curr_block;
+
+ bz_stream bz;
+} bzseek_file;
/* open a file, initialise a bzseek_file.
if idx_file is NULL, read the index directly from the bzip_data file */
-bzseek_err bzseek_open(bzseek_file* file, FILE* bzip_data, FILE* idx_file);
+bzseek_err bzseek_open(bzseek_file* file, const char* bzip_data, const char* idx_file);
/* return the length of the uncompressed file */
uint64_t bzseek_len(bzseek_file* file);
+
/* read a range of bytes into the supplied buffer.
if this returns BZSEEK_EOF, it means that end of file was reached and
only (bzseek_len(file) - start) bytes were written into the buffer */
-bzseek_err bzseek_read(bzseek_file* file, uint64_t start, int len, char* buf);
+bzseek_err bzseek_read(bzseek_file* file, uint64_t start, unsigned int len, char* buf);
/* get a string representation of an error message.
for BZSEEK_IO_ERR, errno will have more details */
View
143 main.c
@@ -0,0 +1,143 @@
+#define _ISOC99_SOURCE
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <errno.h>
+#include "bzseek.h"
+
+
+void usage(const char* progname){
+ const char msg[] = {
+ "\n"
+ "Usage: \n"
+ "%s <file.bz2> [-i indexfile] -g\n"
+ " -- generate an index for the bzip2 <file.bz2>\n"
+ "%s <file.bz2> [-i indexfile] <start> <len>\n"
+ " -- read <len> bytes starting at <start> from an indexed file\n"
+ " <start> and <len> may be decimal, 0xhex or 0oct\n"
+ };
+ fprintf(stderr, msg, progname, progname);
+ exit(1);
+}
+
+void describe(const char* prog, const char* msg, bzseek_err err){
+ if (err == BZSEEK_IO_ERR){
+ fprintf(stderr, "%s: ", prog);
+ perror(msg);
+ }else if (err != BZSEEK_OK){
+ fprintf(stderr, "%s: %s: %s\n", prog, msg, bzseek_errmsg(err));
+ }
+}
+
+int do_build(const char* prog, const char* filename, const char* index){
+ bzseek_err err = bzseek_build_index(filename, index);
+ if (err){
+ describe(prog, "building index", err);
+ return 1;
+ }else{
+ return 0;
+ }
+}
+
+#define BUFFER_SIZE 1024
+int do_read(const char* prog, const char* filename, const char* index, uint64_t start, unsigned int len){
+ char buf[BUFFER_SIZE];
+ bzseek_err err;
+ bzseek_file f;
+
+ err = bzseek_open(&f, filename, index);
+ if (err){describe(prog, "opening file", err); return 1;}
+
+ while (len > 0){
+ int l = len > BUFFER_SIZE ? BUFFER_SIZE : len;
+ err = bzseek_read(&f, start, l, buf);
+ if (err){
+ if (err == BZSEEK_EOF){
+ fprintf(stderr, "%s: Warning: End-of-file reached during read.\n", prog);
+ l = len = (int)(bzseek_len(&f) - start);
+ }else{
+ describe(prog, "reading", err);
+ return 1;
+ }
+ }
+ fwrite(buf, 1, l, stdout);
+ len -= l;
+ start += l;
+ }
+ return 0;
+}
+
+int main(int argc, char* argv[]){
+ char* indexfile = NULL;
+ struct option longopts[] = {
+ {"index", 1, NULL, 'i'},
+ {"generate", 0, NULL, 'g'},
+ {0,0,0,0}
+ };
+ int c;
+ int mode = 0;
+ char* opts[3];
+ int nopts = 0;
+ while ((c = getopt_long(argc, argv, "-gi:", longopts, NULL)) != -1){
+ switch (c){
+ case 1:
+ if (nopts < 3) opts[nopts++] = optarg;
+ break;
+ case 'i':
+ indexfile = optarg;
+ break;
+ case 'g':
+ mode = 1;
+ break;
+ case '?':
+ if (optopt == 'i')
+ fprintf (stderr, "Option -%c requires an argument.\n", optopt);
+ else if (isprint (optopt))
+ fprintf (stderr, "Unknown option `-%c'.\n", optopt);
+ else
+ fprintf (stderr,
+ "Unknown option character `\\x%x'.\n",
+ optopt);
+ usage(argv[0]);
+ break;
+ default:
+ fprintf(stderr, "Error parsing options\n");
+ usage(argv[0]);
+ }
+ }
+
+ if (mode == 0){
+ if (nopts != 3){
+ fprintf(stderr, "Missing arguments\n");
+ usage(argv[0]);
+ }
+ char* filename = opts[0];
+ uint64_t start;
+ unsigned int len;
+
+ char* endptr;
+ start = strtoull(opts[1], &endptr, 0);
+ if (*endptr != '\0'){
+ fprintf(stderr, "Can't parse %s as a starting position\n", opts[1]);
+ usage(argv[0]);
+ }
+ len = strtoul(opts[2], &endptr, 0);
+ if (*endptr != '\0'){
+ fprintf(stderr, "Can't parse %s as a length\n", opts[2]);
+ usage(argv[0]);
+ }
+ return do_read(argv[0], filename, indexfile, start, len);
+ }else{
+ if (nopts < 1){
+ fprintf(stderr, "Missing filename\n");
+ usage(argv[0]);
+ }else if (nopts > 1){
+ fprintf(stderr, "Unexpected options\n");
+ usage(argv[0]);
+ }
+ const char* filename = opts[0];
+ return do_build(argv[0], filename, indexfile);
+ }
+}

0 comments on commit f70864f

Please sign in to comment.