Skip to content

Commit

Permalink
Add support for charset option to cab format reader.
Browse files Browse the repository at this point in the history
SVN-Revision: 3033
  • Loading branch information
ggcueroad committed Mar 20, 2011
1 parent 79a8745 commit bc56f3b
Show file tree
Hide file tree
Showing 6 changed files with 202 additions and 29 deletions.
2 changes: 2 additions & 0 deletions Makefile.am
Expand Up @@ -289,6 +289,7 @@ libarchive_test_SOURCES= \
libarchive/test/test_read_file_nonexistent.c \
libarchive/test/test_read_format_ar.c \
libarchive/test/test_read_format_cab.c \
libarchive/test/test_read_format_cab_filename.c \
libarchive/test/test_read_format_cpio_afio.c \
libarchive/test/test_read_format_cpio_bin.c \
libarchive/test/test_read_format_cpio_bin_Z.c \
Expand Down Expand Up @@ -419,6 +420,7 @@ libarchive_test_EXTRA_DIST=\
libarchive/test/test_read_format_cab_1.cab.uu \
libarchive/test/test_read_format_cab_2.cab.uu \
libarchive/test/test_read_format_cab_3.cab.uu \
libarchive/test/test_read_format_cab_cp932.cab.uu \
libarchive/test/test_read_format_cpio_bin_be.cpio.uu \
libarchive/test/test_read_format_cpio_svr4_bzip2_rpm.rpm.uu \
libarchive/test/test_read_format_cpio_svr4_gzip_rpm.rpm.uu \
Expand Down
97 changes: 69 additions & 28 deletions libarchive/archive_read_support_format_cab.c
@@ -1,5 +1,5 @@
/*-
* Copyright (c) 2010 Michihiro NAKAJIMA
* Copyright (c) 2010-2011 Michihiro NAKAJIMA
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -290,6 +290,7 @@ struct cab {
unsigned char *uncompressed_buffer;
size_t uncompressed_buffer_size;

char *charset;
char format_name[64];

#ifdef HAVE_ZLIB_H
Expand All @@ -300,6 +301,8 @@ struct cab {
};

static int archive_read_format_cab_bid(struct archive_read *);
static int archive_read_format_cab_options(struct archive_read *,
const char *, const char *);
static int archive_read_format_cab_read_header(struct archive_read *,
struct archive_entry *);
static int archive_read_format_cab_read_data(struct archive_read *,
Expand Down Expand Up @@ -362,7 +365,7 @@ archive_read_support_format_cab(struct archive *_a)
cab,
"cab",
archive_read_format_cab_bid,
NULL,
archive_read_format_cab_options,
archive_read_format_cab_read_header,
archive_read_format_cab_read_data,
archive_read_format_cab_read_data_skip,
Expand Down Expand Up @@ -443,6 +446,29 @@ archive_read_format_cab_bid(struct archive_read *a)
return (0);
}

static int
archive_read_format_cab_options(struct archive_read *a,
const char *key, const char *val)
{
struct cab *cab;
int ret = ARCHIVE_FAILED;

cab = (struct cab *)(a->format->data);
if (strcmp(key, "charset") == 0) {
if (val == NULL || val[0] == 0)
archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
"cab: charset option needs a character-set name");
else {
cab->charset = strdup(val);
ret = ARCHIVE_OK;
}
} else
archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
"cab: unknown keyword ``%s''", key);

return (ret);
}

static int
cab_skip_sfx(struct archive_read *a)
{
Expand Down Expand Up @@ -526,38 +552,49 @@ cab_read_ahead_remaining(struct archive_read *a, size_t min, ssize_t *avail)
/* Convert a path separator '\' -> '/' */
static void
cab_convert_path_separator(struct archive_read *a, struct cab *cab,
struct archive_string *pathname, unsigned char attr)
struct archive_string *fn, unsigned char attr)
{
int l, r;
size_t i;

if (strchr(pathname->s, '\\') == NULL)
for (i = 0; i < archive_strlen(fn); i++) {
if (fn->s[i] == '\\')
fn->s[i] = '/';
else if (fn->s[i] & 0x80)
/* Are there any multibyte characters in fn ? */
break;
}
if (i == archive_strlen(fn))
return;

archive_wstring_empty(&cab->ws);
if ((attr & ATTR_NAME_IS_UTF) != 0 ||
archive_wstring_append_from_mbs(&a->archive, &(cab->ws), pathname->s, pathname->length) != 0) {
for (l = 0; pathname->s[l] != '\0'; l++) {
if (pathname->s[l] == '\\')
pathname->s[l] = '/';
/*
* Try to replace a character in wide character.
*/

/* If a conversion to wide character failed, force a replacement. */
if (!archive_wstring_append_from_mbs(&a->archive, &(cab->ws),
fn->s, fn->length)) {
for (i = 0; i < archive_strlen(fn); i++) {
if (fn->s[i] == '\\')
fn->s[i] = '/';
}
return;
}

r = 0;
for (l = 0; cab->ws.s[l] != L'\0'; l++) {
if (cab->ws.s[l] == L'\\') {
cab->ws.s[l] = L'/';
r = 1;
}
}
if (r) {
archive_string_empty(&cab->mbs);
archive_string_append_from_unicode_to_mbs(&a->archive, &cab->mbs, cab->ws.s, cab->ws.length);
/* If mbs length is different to pathname, we broke the
* pathname. We shouldn't use it. */
if (archive_strlen(&cab->mbs) == archive_strlen(pathname))
archive_string_copy(pathname, &cab->mbs);
for (i = 0; i < archive_strlen(&(cab->ws)); i++) {
if (cab->ws.s[i] == L'\\')
cab->ws.s[i] = L'/';
}

/*
* Sanity check that we surely did not break a filename.
*/
archive_string_empty(&(cab->mbs));
archive_string_append_from_unicode_to_mbs(&a->archive, &(cab->mbs),
cab->ws.s, cab->ws.length);
/* If mbs length is different to fn, we broke the
* filename and we shouldn't use it. */
if (archive_strlen(&(cab->mbs)) == archive_strlen(fn))
archive_string_copy(fn, &(cab->mbs));
}

/*
Expand Down Expand Up @@ -753,7 +790,12 @@ cab_read_header(struct archive_read *a)
if ((len = cab_strnlen(p, avail-1)) <= 0)
goto invalid;
archive_string_init(&(file->pathname));
archive_strncpy(&(file->pathname), p, len);
if ((file->attr & ATTR_NAME_IS_UTF) && cab->charset == NULL)
archive_strncpy_from_specific_locale(&a->archive,
&(file->pathname), (const char *)p, len, "UTF-8");
else
archive_strncpy_from_specific_locale(&a->archive,
&(file->pathname), (const char *)p, len, cab->charset);
__archive_read_consume(a, len + 1);
cab->cab_offset += len + 1;
/* Convert a path separator '\' -> '/' */
Expand Down Expand Up @@ -885,8 +927,6 @@ archive_read_format_cab_read_header(struct archive_read *a,
* Set a default value and common data
*/
archive_entry_set_pathname(entry, file->pathname.s);
if (file->attr & ATTR_NAME_IS_UTF)
archive_entry_update_pathname_utf8(entry, file->pathname.s);

archive_entry_set_size(entry, file->uncompressed_size);
if (file->attr & ATTR_RDONLY)
Expand Down Expand Up @@ -1928,6 +1968,7 @@ archive_read_format_cab_cleanup(struct archive_read *a)
archive_wstring_free(&cab->ws);
archive_string_free(&cab->mbs);
free(cab->uncompressed_buffer);
free(cab->charset);
free(cab);
(a->format->data) = NULL;
return (ARCHIVE_OK);
Expand Down
2 changes: 1 addition & 1 deletion libarchive/archive_read_support_format_lha.c
Expand Up @@ -730,7 +730,7 @@ lha_replace_path_separator(struct archive_read *a, struct lha *lha,
* Try to replace a character in wide character.
*/

/* If converting to wide character failed, force a replacement. */
/* If a conversion to wide character failed, force a replacement. */
if (!archive_wstring_append_from_mbs(&a->archive, &(lha->ws),
fn->s, fn->length)) {
for (i = 0; i < archive_strlen(fn); i++) {
Expand Down
1 change: 1 addition & 0 deletions libarchive/test/CMakeLists.txt
Expand Up @@ -60,6 +60,7 @@ IF(ENABLE_TEST)
test_read_file_nonexistent.c
test_read_format_ar.c
test_read_format_cab.c
test_read_format_cab_filename.c
test_read_format_cpio_afio.c
test_read_format_cpio_bin.c
test_read_format_cpio_bin_Z.c
Expand Down
7 changes: 7 additions & 0 deletions libarchive/test/test_read_format_cab_cp932.cab.uu
@@ -0,0 +1,7 @@
begin 644 test_read_format_cab_cp932.cab
M35-#1@````"4`````````"P``````````P$!``(````(_@``;@````$``Q(%
M````````````=#ZO5"``E5R"OH+F7(J_CIHN='AT``4````%``````!T/KM4
M(`"57(*^@N9<B.J7EY5<+G1X=``I]2&+'@`*`%N`@(T`,*```0````$````!
-````:V%N:FE(96QL;P``
`
end
122 changes: 122 additions & 0 deletions libarchive/test/test_read_format_cab_filename.c
@@ -0,0 +1,122 @@
/*-
* Copyright (c) 2011 Michihiro NAKAJIMA
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test.h"
__FBSDID("$FreeBSD");

#include <locale.h>

DEFINE_TEST(test_read_format_cab_filename)
{
struct archive *a;
struct archive_entry *ae;
const char *refname = "test_read_format_cab_cp932.cab";

/*
* Read CAB filename in ja_JP.eucJP with "charset=CP932" option.
*/
if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
skipping("ja_JP.eucJP locale not available on this system.");
return;
}

extract_reference_file(refname);
assert((a = archive_read_new()) != NULL);
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_compression_all(a));
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
assertEqualIntA(a, ARCHIVE_OK,
archive_read_set_options(a, "charset=CP932"));
assertEqualIntA(a, ARCHIVE_OK,
archive_read_open_filename(a, refname, 10240));

/* Verify regular file. */
assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
assertEqualString(
"\xc9\xbd\xa4\xc0\xa4\xe8\x2f\xb4\xc1\xbb\xfa\x2e\x74\x78\x74",
archive_entry_pathname(ae));
assertEqualInt(5, archive_entry_size(ae));

/* Verify regular file. */
assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
assertEqualString(
"\xc9\xbd\xa4\xc0\xa4\xe8\x2f\xb0\xec\xcd\xf7\xc9\xbd\x2e\x74\x78\x74",
archive_entry_pathname(ae));
assertEqualInt(5, archive_entry_size(ae));


/* End of archive. */
assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));

/* Verify archive format. */
assertEqualIntA(a, ARCHIVE_COMPRESSION_NONE, archive_compression(a));
assertEqualIntA(a, ARCHIVE_FORMAT_CAB, archive_format(a));

/* Close the archive. */
assertEqualInt(ARCHIVE_OK, archive_read_close(a));
assertEqualInt(ARCHIVE_OK, archive_read_free(a));


/*
* Read CAB filename in ja_JP.UTF-8 with "charset=CP932" option.
*/
if (NULL == setlocale(LC_ALL, "ja_JP.UTF-8")) {
skipping("ja_JP.UTF-8 locale not available on this system.");
return;
}

assert((a = archive_read_new()) != NULL);
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_compression_all(a));
assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
assertEqualIntA(a, ARCHIVE_OK,
archive_read_set_options(a, "charset=CP932"));
assertEqualIntA(a, ARCHIVE_OK,
archive_read_open_filename(a, refname, 10240));

/* Verify regular file. */
assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
assertEqualString("\xe8\xa1\xa8\xe3\x81\xa0\xe3\x82\x88\x2f"
"\xe6\xbc\xa2\xe5\xad\x97\x2e\x74\x78\x74",
archive_entry_pathname(ae));
assertEqualInt(5, archive_entry_size(ae));

/* Verify regular file. */
assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
assertEqualString("\xe8\xa1\xa8\xe3\x81\xa0\xe3\x82\x88\x2f"
"\xe4\xb8\x80\xe8\xa6\xa7\xe8\xa1\xa8\x2e\x74\x78\x74",
archive_entry_pathname(ae));
assertEqualInt(5, archive_entry_size(ae));


/* End of archive. */
assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));

/* Verify archive format. */
assertEqualIntA(a, ARCHIVE_COMPRESSION_NONE, archive_compression(a));
assertEqualIntA(a, ARCHIVE_FORMAT_CAB, archive_format(a));

/* Close the archive. */
assertEqualInt(ARCHIVE_OK, archive_read_close(a));
assertEqualInt(ARCHIVE_OK, archive_read_free(a));
}

0 comments on commit bc56f3b

Please sign in to comment.