Permalink
Browse files

Implement function used to seek within data blocks.

This only implements seeking fully for uncompressed RAR files. Seeking is not
implemented for compressed RAR files and for the other formats (ZIP, TAR, etc.).
  • Loading branch information...
amejia1 committed Sep 22, 2012
1 parent 66b1a0b commit a5b03cc1c5c7c7af0f289324b799b7103d12f28b
Showing with 1,895 additions and 8 deletions.
  1. +9 −0 Makefile.am
  2. +3 −0 libarchive/archive.h
  3. +126 −1 libarchive/archive_read.c
  4. +9 −0 libarchive/archive_read_private.h
  5. +1 −0 libarchive/archive_read_support_format_7zip.c
  6. +1 −0 libarchive/archive_read_support_format_ar.c
  7. +1 −0 libarchive/archive_read_support_format_cab.c
  8. +1 −0 libarchive/archive_read_support_format_cpio.c
  9. +1 −0 libarchive/archive_read_support_format_empty.c
  10. +1 −0 libarchive/archive_read_support_format_iso9660.c
  11. +1 −0 libarchive/archive_read_support_format_lha.c
  12. +1 −1 libarchive/archive_read_support_format_mtree.c
  13. +151 −6 libarchive/archive_read_support_format_rar.c
  14. +1 −0 libarchive/archive_read_support_format_raw.c
  15. +1 −0 libarchive/archive_read_support_format_tar.c
  16. +1 −0 libarchive/archive_read_support_format_xar.c
  17. +2 −0 libarchive/archive_read_support_format_zip.c
  18. +163 −0 libarchive/test/test_rar_multivolume_multiple_files.part1.rar.uu
  19. +163 −0 libarchive/test/test_rar_multivolume_multiple_files.part2.rar.uu
  20. +163 −0 libarchive/test/test_rar_multivolume_multiple_files.part3.rar.uu
  21. +163 −0 libarchive/test/test_rar_multivolume_multiple_files.part4.rar.uu
  22. +163 −0 libarchive/test/test_rar_multivolume_multiple_files.part5.rar.uu
  23. +117 −0 libarchive/test/test_rar_multivolume_multiple_files.part6.rar.uu
  24. +193 −0 libarchive/test/test_read_format_rar.c
  25. +159 −0 libarchive/test/test_splitted_rar_seek_support_aa.uu
  26. +159 −0 libarchive/test/test_splitted_rar_seek_support_ab.uu
  27. +141 −0 libarchive/test/test_splitted_rar_seek_support_ac.uu
View
@@ -492,6 +492,12 @@ libarchive_test_EXTRA_DIST=\
libarchive/test/test_fuzz.cab.uu \
libarchive/test/test_fuzz.lzh.uu \
libarchive/test/test_pax_filename_encoding.tar.uu \
+ libarchive/test/test_rar_multivolume_multiple_files.part1.rar.uu \
+ libarchive/test/test_rar_multivolume_multiple_files.part2.rar.uu \
+ libarchive/test/test_rar_multivolume_multiple_files.part3.rar.uu \
+ libarchive/test/test_rar_multivolume_multiple_files.part4.rar.uu \
+ libarchive/test/test_rar_multivolume_multiple_files.part5.rar.uu \
+ libarchive/test/test_rar_multivolume_multiple_files.part6.rar.uu \
libarchive/test/test_rar_multivolume_single_file.part1.rar.uu \
libarchive/test/test_rar_multivolume_single_file.part2.rar.uu \
libarchive/test/test_rar_multivolume_single_file.part3.rar.uu \
@@ -612,6 +618,9 @@ libarchive_test_EXTRA_DIST=\
libarchive/test/test_read_splitted_rar_ab.uu \
libarchive/test/test_read_splitted_rar_ac.uu \
libarchive/test/test_read_splitted_rar_ad.uu \
+ libarchive/test/test_splitted_rar_seek_support_aa.uu \
+ libarchive/test/test_splitted_rar_seek_support_ab.uu \
+ libarchive/test/test_splitted_rar_seek_support_ac.uu \
libarchive/test/CMakeLists.txt \
libarchive/test/README
View
@@ -440,6 +440,9 @@ __LA_DECL __LA_INT64_T archive_read_header_position(struct archive *);
__LA_DECL __LA_SSIZE_T archive_read_data(struct archive *,
void *, size_t);
+/* Seek within the body of an entry. Similar to lseek(2). */
+__LA_DECL __LA_INT64_T archive_seek_data(struct archive *, __LA_INT64_T, int);
+
/*
* A zero-copy version of archive_read_data that also exposes the file offset
* of each returned block. Note that the client has no way to specify
View
@@ -399,6 +399,8 @@ archive_read_set_callback_data2(struct archive *_a, void *client_data,
return ARCHIVE_FATAL;
}
a->client.dataset[iindex].data = client_data;
+ a->client.dataset[iindex].begin_position = -1;
+ a->client.dataset[iindex].total_size = -1;
return ARCHIVE_OK;
}
@@ -427,8 +429,14 @@ archive_read_add_callback_data(struct archive *_a, void *client_data,
}
a->client.dataset = (struct archive_read_data_node *)p;
for (i = a->client.nodes - 1; i > iindex && i > 0; i--)
+ {
a->client.dataset[i].data = a->client.dataset[i-1].data;
+ a->client.dataset[i].begin_position = -1;
+ a->client.dataset[i].total_size = -1;
+ }
a->client.dataset[iindex].data = client_data;
+ a->client.dataset[iindex].begin_position = -1;
+ a->client.dataset[iindex].total_size = -1;
return ARCHIVE_OK;
}
@@ -495,6 +503,9 @@ archive_read_open1(struct archive *_a)
filter->code = ARCHIVE_COMPRESSION_NONE;
a->filter = filter;
+ client_switch_proxy(a->filter, 0);
+ a->client.dataset[0].begin_position = 0;
+
/* Build out the input pipeline. */
e = choose_filters(a);
if (e < ARCHIVE_WARN) {
@@ -641,6 +652,7 @@ _archive_read_next_header2(struct archive *_a, struct archive_entry *entry)
a->read_data_output_offset = 0;
a->read_data_remaining = 0;
+ a->data_start_node = a->client.cursor;
/* EOF always wins; otherwise return the worst error. */
return (r2 < r1 || r2 == ARCHIVE_EOF) ? r2 : r1;
}
@@ -838,6 +850,23 @@ archive_read_data_skip(struct archive *_a)
return (r);
}
+int64_t
+archive_seek_data(struct archive *_a, int64_t offset, int whence)
+{
+ struct archive_read *a = (struct archive_read *)_a;
+ archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA,
+ "archive_seek_data_block");
+
+ if (a->format->seek_data == NULL) {
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
+ "Internal error: "
+ "No format_seek_data_block function registered");
+ return (ARCHIVE_FATAL);
+ }
+
+ return (a->format->seek_data)(a, offset, whence);
+}
+
/*
* Read the next block of entry data from the archive.
* This is a zero-copy interface; the client receives a pointer,
@@ -1049,6 +1078,7 @@ __archive_read_register_format(struct archive_read *a,
int (*read_header)(struct archive_read *, struct archive_entry *),
int (*read_data)(struct archive_read *, const void **, size_t *, int64_t *),
int (*read_data_skip)(struct archive_read *),
+ int64_t (*seek_data)(struct archive_read *, int64_t, int),
int (*cleanup)(struct archive_read *))
{
int i, number_slots;
@@ -1068,6 +1098,7 @@ __archive_read_register_format(struct archive_read *a,
a->formats[i].read_header = read_header;
a->formats[i].read_data = read_data;
a->formats[i].read_data_skip = read_data_skip;
+ a->formats[i].seek_data = seek_data;
a->formats[i].cleanup = cleanup;
a->formats[i].data = format_data;
a->formats[i].name = name;
@@ -1463,12 +1494,106 @@ int64_t
__archive_read_filter_seek(struct archive_read_filter *filter, int64_t offset, int whence)
{
int64_t r;
+ unsigned int cursor;
if (filter->closed || filter->fatal)
return (ARCHIVE_FATAL);
if (filter->seek == NULL)
return (ARCHIVE_FAILED);
- r = filter->seek(filter, offset, whence);
+
+ switch (whence)
+ {
+ case SEEK_CUR:
+ /* Adjust the offset and use SEEK_SET instead */
+ offset += filter->position;
+ case SEEK_SET:
+ cursor = 0;
+ while (1)
+ {
+ if (filter->archive->client.dataset[cursor].begin_position < 0 ||
+ filter->archive->client.dataset[cursor].total_size < 0 ||
+ filter->archive->client.dataset[cursor].begin_position +
+ filter->archive->client.dataset[cursor].total_size - 1 > offset ||
+ cursor + 1 >= filter->archive->client.nodes)
+ break;
+ r = filter->archive->client.dataset[cursor].begin_position +
+ filter->archive->client.dataset[cursor].total_size;
+ filter->archive->client.dataset[++cursor].begin_position = r;
+ }
+ while (1)
+ {
+ if ((r = client_switch_proxy(filter, cursor)) != (ARCHIVE_OK))
+ return r;
+ if ((r = client_seek_proxy(filter, 0, SEEK_END)) < 0)
+ return r;
+ filter->archive->client.dataset[cursor].total_size = r;
+ if (filter->archive->client.dataset[cursor].begin_position +
+ filter->archive->client.dataset[cursor].total_size - 1 > offset ||
+ cursor + 1 >= filter->archive->client.nodes)
+ break;
+ r = filter->archive->client.dataset[cursor].begin_position +
+ filter->archive->client.dataset[cursor].total_size;
+ filter->archive->client.dataset[++cursor].begin_position = r;
+ }
+ offset -= filter->archive->client.dataset[cursor].begin_position;
+ if (offset < 0)
+ offset = 0;
+ else if (offset > filter->archive->client.dataset[cursor].total_size - 1)
+ offset = filter->archive->client.dataset[cursor].total_size - 1;
+ if ((r = client_seek_proxy(filter, offset, SEEK_SET)) < 0)
+ return r;
+ break;
+
+ case SEEK_END:
+ cursor = 0;
+ while (1)
+ {
+ if (filter->archive->client.dataset[cursor].begin_position < 0 ||
+ filter->archive->client.dataset[cursor].total_size < 0 ||
+ cursor + 1 >= filter->archive->client.nodes)
+ break;
+ r = filter->archive->client.dataset[cursor].begin_position +
+ filter->archive->client.dataset[cursor].total_size;
+ filter->archive->client.dataset[++cursor].begin_position = r;
+ }
+ while (1)
+ {
+ if ((r = client_switch_proxy(filter, cursor)) != (ARCHIVE_OK))
+ return r;
+ if ((r = client_seek_proxy(filter, 0, SEEK_END)) < 0)
+ return r;
+ filter->archive->client.dataset[cursor].total_size = r;
+ r = filter->archive->client.dataset[cursor].begin_position +
+ filter->archive->client.dataset[cursor].total_size;
+ if (cursor + 1 >= filter->archive->client.nodes)
+ break;
+ filter->archive->client.dataset[++cursor].begin_position = r;
+ }
+ while (1)
+ {
+ if (r + offset >=
+ filter->archive->client.dataset[cursor].begin_position)
+ break;
+ offset += filter->archive->client.dataset[cursor].total_size;
+ if (cursor == 0)
+ break;
+ cursor--;
+ r = filter->archive->client.dataset[cursor].begin_position +
+ filter->archive->client.dataset[cursor].total_size;
+ }
+ offset = (r + offset) -
+ filter->archive->client.dataset[cursor].begin_position;
+ if ((r = client_switch_proxy(filter, cursor)) != (ARCHIVE_OK))
+ return r;
+ if ((r = client_seek_proxy(filter, offset, SEEK_SET)) < (ARCHIVE_OK))
+ return r;
+ break;
+
+ default:
+ return (ARCHIVE_FATAL);
+ }
+ r += filter->archive->client.dataset[cursor].begin_position;
+
if (r >= 0) {
/*
* Ouch. Clearing the buffer like this hurts, especially
@@ -123,6 +123,8 @@ struct archive_read_filter {
* so should be deferred at least until libarchive 3.0.
*/
struct archive_read_data_node {
+ int64_t begin_position;
+ int64_t total_size;
void *data;
};
struct archive_read_client {
@@ -134,6 +136,7 @@ struct archive_read_client {
archive_switch_callback *switcher;
unsigned int nodes;
unsigned int cursor;
+ int64_t position;
struct archive_read_data_node *dataset;
};
@@ -168,6 +171,10 @@ struct archive_read {
/* File offset of beginning of most recently-read header. */
int64_t header_position;
+ /* Nodes and offsets of compressed data block */
+ unsigned int data_start_node;
+ unsigned int data_end_node;
+
/*
* Format detection is mostly the same as compression
* detection, with one significant difference: The bidders
@@ -185,6 +192,7 @@ struct archive_read {
int (*read_header)(struct archive_read *, struct archive_entry *);
int (*read_data)(struct archive_read *, const void **, size_t *, int64_t *);
int (*read_data_skip)(struct archive_read *);
+ int64_t (*seek_data)(struct archive_read *, int64_t, int);
int (*cleanup)(struct archive_read *);
} formats[16];
struct archive_format_descriptor *format; /* Active format. */
@@ -204,6 +212,7 @@ int __archive_read_register_format(struct archive_read *a,
int (*read_header)(struct archive_read *, struct archive_entry *),
int (*read_data)(struct archive_read *, const void **, size_t *, int64_t *),
int (*read_data_skip)(struct archive_read *),
+ int64_t (*seek_data)(struct archive_read *, int64_t, int),
int (*cleanup)(struct archive_read *));
int __archive_read_get_bidder(struct archive_read *a,
@@ -409,6 +409,7 @@ archive_read_support_format_7zip(struct archive *_a)
archive_read_format_7zip_read_header,
archive_read_format_7zip_read_data,
archive_read_format_7zip_read_data_skip,
+ NULL,
archive_read_format_7zip_cleanup);
if (r != ARCHIVE_OK)
@@ -121,6 +121,7 @@ archive_read_support_format_ar(struct archive *_a)
archive_read_format_ar_read_header,
archive_read_format_ar_read_data,
archive_read_format_ar_skip,
+ NULL,
archive_read_format_ar_cleanup);
if (r != ARCHIVE_OK) {
@@ -382,6 +382,7 @@ archive_read_support_format_cab(struct archive *_a)
archive_read_format_cab_read_header,
archive_read_format_cab_read_data,
archive_read_format_cab_read_data_skip,
+ NULL,
archive_read_format_cab_cleanup);
if (r != ARCHIVE_OK)
@@ -242,6 +242,7 @@ archive_read_support_format_cpio(struct archive *_a)
archive_read_format_cpio_read_header,
archive_read_format_cpio_read_data,
archive_read_format_cpio_skip,
+ NULL,
archive_read_format_cpio_cleanup);
if (r != ARCHIVE_OK)
@@ -53,6 +53,7 @@ archive_read_support_format_empty(struct archive *_a)
archive_read_format_empty_read_header,
archive_read_format_empty_read_data,
NULL,
+ NULL,
NULL);
return (r);
@@ -475,6 +475,7 @@ archive_read_support_format_iso9660(struct archive *_a)
archive_read_format_iso9660_read_header,
archive_read_format_iso9660_read_data,
archive_read_format_iso9660_read_data_skip,
+ NULL,
archive_read_format_iso9660_cleanup);
if (r != ARCHIVE_OK) {
@@ -319,6 +319,7 @@ archive_read_support_format_lha(struct archive *_a)
archive_read_format_lha_read_header,
archive_read_format_lha_read_data,
archive_read_format_lha_read_data_skip,
+ NULL,
archive_read_format_lha_cleanup);
if (r != ARCHIVE_OK)
@@ -202,7 +202,7 @@ archive_read_support_format_mtree(struct archive *_a)
mtree->fd = -1;
r = __archive_read_register_format(a, mtree, "mtree",
- mtree_bid, NULL, read_header, read_data, skip, cleanup);
+ mtree_bid, NULL, read_header, read_data, skip, NULL, cleanup);
if (r != ARCHIVE_OK)
free(mtree);
Oops, something went wrong.

0 comments on commit a5b03cc

Please sign in to comment.