Skip to content

Commit

Permalink
add X-Archive-Charset and Unicode Path extra field
Browse files Browse the repository at this point in the history
  • Loading branch information
tony2001 committed Nov 14, 2010
1 parent 0eac32a commit 46efeae
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 32 deletions.
125 changes: 122 additions & 3 deletions ngx_http_zip_file.c
Expand Up @@ -3,6 +3,8 @@
#include "ngx_http_zip_file.h" #include "ngx_http_zip_file.h"
#include "ngx_http_zip_file_format.h" #include "ngx_http_zip_file_format.h"


#include <iconv.h>
static ngx_str_t ngx_http_zip_header_charset_name = ngx_string("upstream_http_x_archive_charset");


#define NGX_MAX_UINT16_VALUE 0xffff #define NGX_MAX_UINT16_VALUE 0xffff


Expand All @@ -26,6 +28,13 @@ static ngx_zip_extra_field_central_t ngx_zip_extra_field_central_template = {
0, /* modification time */ 0, /* modification time */
}; };


static ngx_zip_extra_field_unicode_path_t ngx_zip_extra_field_unicode_path_template = {
0x7075, /* Info-ZIP Unicode Path tag */
0,
1, /* version of this extra field, currently 1 (c) */
0, /* crc-32 */
};

static ngx_zip_extra_field_zip64_sizes_only_t ngx_zip_extra_field_zip64_sizes_only_template = { static ngx_zip_extra_field_zip64_sizes_only_t ngx_zip_extra_field_zip64_sizes_only_template = {
0x0001, //tag for zip64 extra field 0x0001, //tag for zip64 extra field
sizeof(ngx_zip_extra_field_zip64_sizes_only_t) - 4, sizeof(ngx_zip_extra_field_zip64_sizes_only_t) - 4,
Expand Down Expand Up @@ -155,6 +164,9 @@ ngx_http_zip_truncate_buffer(ngx_buf_t *b,
} }
} }


#ifndef ICONV_CSNMAXLEN
#define ICONV_CSNMAXLEN 64
#endif


// make our proposed ZIP-file chunk map // make our proposed ZIP-file chunk map
ngx_int_t ngx_int_t
Expand All @@ -164,6 +176,33 @@ ngx_http_zip_generate_pieces(ngx_http_request_t *r, ngx_http_zip_ctx_t *ctx)
off_t offset = 0; off_t offset = 0;
ngx_http_zip_file_t *file; ngx_http_zip_file_t *file;
ngx_http_zip_piece_t *header_piece, *file_piece, *trailer_piece, *cd_piece; ngx_http_zip_piece_t *header_piece, *file_piece, *trailer_piece, *cd_piece;
ngx_http_variable_value_t *vv;
iconv_t *iconv_cd;

if ((vv = ngx_palloc(r->pool, sizeof(ngx_http_variable_value_t))) == NULL)
return NGX_ERROR;

if (ngx_http_upstream_header_variable(r, vv, (uintptr_t)(&ngx_http_zip_header_charset_name)) != NGX_OK
|| vv->not_found || ngx_strncmp(vv->data, "utf8", sizeof("utf8") - 1) == 0) {
iconv_cd = NULL;
} else {
char encoding[ICONV_CSNMAXLEN];
snprintf(encoding, sizeof(encoding), "%s//TRANSLIT//IGNORE", vv->data);

iconv_cd = iconv_open((const char *)encoding, "utf-8");
if (iconv_cd == (iconv_t)(-1)) {
ngx_log_error(NGX_LOG_WARN, r->connection->log, errno,
"mod_zip: iconv_open('%s', 'utf-8') failed",
vv->data);
iconv_cd = NULL;
}
}

if (iconv_cd) {
ctx->unicode_path = 1;
} else {
ctx->unicode_path = 0;
}


// pieces: for each file: header, data, footer (if needed) -> 2 or 3 per file // pieces: for each file: header, data, footer (if needed) -> 2 or 3 per file
// plus file footer (CD + [zip64 end + zip64 locator +] end of cd) in one chunk // plus file footer (CD + [zip64 end + zip64 locator +] end of cd) in one chunk
Expand All @@ -177,6 +216,42 @@ ngx_http_zip_generate_pieces(ngx_http_request_t *r, ngx_http_zip_ctx_t *ctx)
file = &((ngx_http_zip_file_t *)ctx->files.elts)[i]; file = &((ngx_http_zip_file_t *)ctx->files.elts)[i];
file->offset = offset; file->offset = offset;


if (ctx->unicode_path) {
size_t inlen = file->filename.len, outlen, outleft;
size_t res, len;
u_char *p, *in;

//inbuf
file->filename_utf8.data = ngx_pnalloc(r->pool, file->filename.len + 1);
ngx_memcpy(file->filename_utf8.data, file->filename.data, file->filename.len);
file->filename_utf8.len = file->filename.len;
file->filename_utf8.data[file->filename.len] = '\0';

//outbuf
outlen = outleft = inlen * sizeof(int) + 15;
file->filename.data = ngx_pnalloc(r->pool, outlen + 1);

in = file->filename_utf8.data;
p = file->filename.data;

//reset state
iconv(iconv_cd, NULL, NULL, NULL, NULL);

//convert the string
res = iconv(iconv_cd, (char **)&in, &inlen, (char **)&p, &outleft);
//XXX if (res == (size_t)-1) { ? }

file->filename.len = outlen - outleft;

p = file->filename_utf8.data;
len = file->filename_utf8.len;
file->filename_utf8_crc32 = 0;
while(len--) {
file->filename_utf8_crc32 = ngx_crc32_table256[(file->filename_utf8_crc32 ^ *p++) & 0xff]
^ (file->filename_utf8_crc32 >> 8);
}
}

if(offset >= (off_t) NGX_MAX_UINT32_VALUE) if(offset >= (off_t) NGX_MAX_UINT32_VALUE)
ctx->zip64_used = file->need_zip64_offset = 1; ctx->zip64_used = file->need_zip64_offset = 1;
if(file->size >= (off_t) NGX_MAX_UINT32_VALUE) if(file->size >= (off_t) NGX_MAX_UINT32_VALUE)
Expand All @@ -185,15 +260,17 @@ ngx_http_zip_generate_pieces(ngx_http_request_t *r, ngx_http_zip_ctx_t *ctx)
ctx->cd_size += sizeof(ngx_zip_central_directory_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_central_t) ctx->cd_size += sizeof(ngx_zip_central_directory_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_central_t)
+ (file->need_zip64_offset ? + (file->need_zip64_offset ?
(file->need_zip64 ? sizeof(ngx_zip_extra_field_zip64_sizes_offset_t) : sizeof(ngx_zip_extra_field_zip64_offset_only_t)) : (file->need_zip64 ? sizeof(ngx_zip_extra_field_zip64_sizes_offset_t) : sizeof(ngx_zip_extra_field_zip64_offset_only_t)) :
(file->need_zip64 ? sizeof(ngx_zip_extra_field_zip64_sizes_only_t) : 0) (file->need_zip64 ? sizeof(ngx_zip_extra_field_zip64_sizes_only_t) : 0) +
(ctx->unicode_path ? (sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len): 0)
); );


header_piece = &ctx->pieces[piece_i++]; header_piece = &ctx->pieces[piece_i++];
header_piece->type = zip_header_piece; header_piece->type = zip_header_piece;
header_piece->file = file; header_piece->file = file;
header_piece->range.start = offset; header_piece->range.start = offset;
header_piece->range.end = offset += sizeof(ngx_zip_local_file_header_t) header_piece->range.end = offset += sizeof(ngx_zip_local_file_header_t)
+ file->filename.len + sizeof(ngx_zip_extra_field_local_t) + (file->need_zip64? sizeof(ngx_zip_extra_field_zip64_sizes_only_t):0); + file->filename.len + sizeof(ngx_zip_extra_field_local_t) + (file->need_zip64? sizeof(ngx_zip_extra_field_zip64_sizes_only_t):0)
+ (ctx->unicode_path ? (sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len): 0);


file_piece = &ctx->pieces[piece_i++]; file_piece = &ctx->pieces[piece_i++];
file_piece->type = zip_file_piece; file_piece->type = zip_file_piece;
Expand All @@ -212,6 +289,10 @@ ngx_http_zip_generate_pieces(ngx_http_request_t *r, ngx_http_zip_ctx_t *ctx)
} }
} }


if (ctx->unicode_path) {
iconv_close(iconv_cd);
}

ctx->zip64_used |= offset >= (off_t) NGX_MAX_UINT32_VALUE || ctx->files.nelts >= NGX_MAX_UINT16_VALUE; ctx->zip64_used |= offset >= (off_t) NGX_MAX_UINT32_VALUE || ctx->files.nelts >= NGX_MAX_UINT16_VALUE;


ctx->cd_size += sizeof(ngx_zip_end_of_central_directory_record_t); ctx->cd_size += sizeof(ngx_zip_end_of_central_directory_record_t);
Expand Down Expand Up @@ -243,9 +324,11 @@ ngx_http_zip_file_header_chain_link(ngx_http_request_t *r, ngx_http_zip_ctx_t *c
ngx_zip_extra_field_local_t extra_field_local; ngx_zip_extra_field_local_t extra_field_local;
ngx_zip_extra_field_zip64_sizes_only_t extra_field_zip64; ngx_zip_extra_field_zip64_sizes_only_t extra_field_zip64;
ngx_zip_local_file_header_t local_file_header; ngx_zip_local_file_header_t local_file_header;
ngx_zip_extra_field_unicode_path_t extra_field_unicode_path;


size_t len = sizeof(ngx_zip_local_file_header_t) + file->filename.len size_t len = sizeof(ngx_zip_local_file_header_t) + file->filename.len
+ sizeof(ngx_zip_extra_field_local_t) + (file->need_zip64? sizeof(ngx_zip_extra_field_zip64_sizes_only_t):0); + sizeof(ngx_zip_extra_field_local_t) + (file->need_zip64? sizeof(ngx_zip_extra_field_zip64_sizes_only_t):0
+ (ctx->unicode_path ? (sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len): 0));


if ((link = ngx_alloc_chain_link(r->pool)) == NULL || (b = ngx_calloc_buf(r->pool)) == NULL if ((link = ngx_alloc_chain_link(r->pool)) == NULL || (b = ngx_calloc_buf(r->pool)) == NULL
|| (b->pos = ngx_pcalloc(r->pool, len)) == NULL) || (b->pos = ngx_pcalloc(r->pool, len)) == NULL)
Expand All @@ -266,6 +349,9 @@ ngx_http_zip_file_header_chain_link(ngx_http_request_t *r, ngx_http_zip_ctx_t *c
local_file_header = ngx_zip_local_file_header_template; local_file_header = ngx_zip_local_file_header_template;
local_file_header.mtime = file->dos_time; local_file_header.mtime = file->dos_time;
local_file_header.filename_len = file->filename.len; local_file_header.filename_len = file->filename.len;
if (ctx->unicode_path) {
local_file_header.flags &= ~zip_utf8_flag;
}
if (file->need_zip64) { if (file->need_zip64) {
local_file_header.version = zip_version_zip64; local_file_header.version = zip_version_zip64;
local_file_header.extra_field_len = sizeof(ngx_zip_extra_field_zip64_sizes_only_t) + sizeof(ngx_zip_extra_field_local_t); local_file_header.extra_field_len = sizeof(ngx_zip_extra_field_zip64_sizes_only_t) + sizeof(ngx_zip_extra_field_local_t);
Expand All @@ -276,6 +362,14 @@ ngx_http_zip_file_header_chain_link(ngx_http_request_t *r, ngx_http_zip_ctx_t *c
local_file_header.uncompressed_size = file->size; local_file_header.uncompressed_size = file->size;
} }


if (ctx->unicode_path) {
extra_field_unicode_path = ngx_zip_extra_field_unicode_path_template;
extra_field_unicode_path.crc32 = file->filename_utf8_crc32;
extra_field_unicode_path.size = sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len - 4;

local_file_header.extra_field_len += sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len;
}

if (!file->missing_crc32) { if (!file->missing_crc32) {
local_file_header.flags &= ~zip_missing_crc32_flag; local_file_header.flags &= ~zip_missing_crc32_flag;
local_file_header.crc32 = file->crc32; local_file_header.crc32 = file->crc32;
Expand All @@ -292,6 +386,11 @@ ngx_http_zip_file_header_chain_link(ngx_http_request_t *r, ngx_http_zip_ctx_t *c
if (file->need_zip64) if (file->need_zip64)
ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t), ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t),
&extra_field_zip64, sizeof(ngx_zip_extra_field_zip64_sizes_only_t)); &extra_field_zip64, sizeof(ngx_zip_extra_field_zip64_sizes_only_t));

if (ctx->unicode_path) {
ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t) + sizeof(ngx_zip_extra_field_zip64_sizes_only_t), &extra_field_unicode_path, sizeof(ngx_zip_extra_field_unicode_path_t));
ngx_memcpy(b->pos + sizeof(ngx_zip_local_file_header_t) + file->filename.len + sizeof(ngx_zip_extra_field_local_t) + sizeof(ngx_zip_extra_field_zip64_sizes_only_t) + sizeof(ngx_zip_extra_field_unicode_path_t), file->filename_utf8.data, file->filename_utf8.len);
}


ngx_http_zip_truncate_buffer(b, &piece->range, range); ngx_http_zip_truncate_buffer(b, &piece->range, range);


Expand Down Expand Up @@ -420,13 +519,18 @@ ngx_http_zip_write_central_directory_entry(u_char *p, ngx_http_zip_file_t *file,
ngx_zip_extra_field_zip64_offset_only_t extra_zip64_offset; ngx_zip_extra_field_zip64_offset_only_t extra_zip64_offset;
ngx_zip_extra_field_zip64_sizes_offset_t extra_zip64_offset_size; ngx_zip_extra_field_zip64_sizes_offset_t extra_zip64_offset_size;
ngx_zip_extra_field_zip64_sizes_only_t extra_zip64_size; ngx_zip_extra_field_zip64_sizes_only_t extra_zip64_size;
ngx_zip_extra_field_unicode_path_t extra_field_unicode_path;
void* extra_zip64_ptr = NULL; //!! void* extra_zip64_ptr = NULL; //!!
size_t extra_zip64_ptr_size = 0; size_t extra_zip64_ptr_size = 0;


central_directory_file_header = ngx_zip_central_directory_file_header_template; central_directory_file_header = ngx_zip_central_directory_file_header_template;
central_directory_file_header.mtime = file->dos_time; central_directory_file_header.mtime = file->dos_time;
central_directory_file_header.crc32 = file->crc32; central_directory_file_header.crc32 = file->crc32;


if (ctx->unicode_path) {
central_directory_file_header.flags &= ~zip_utf8_flag;
}

if (!file->need_zip64) { if (!file->need_zip64) {
central_directory_file_header.compressed_size = file->size; central_directory_file_header.compressed_size = file->size;
central_directory_file_header.uncompressed_size = file->size; central_directory_file_header.uncompressed_size = file->size;
Expand Down Expand Up @@ -463,6 +567,14 @@ ngx_http_zip_write_central_directory_entry(u_char *p, ngx_http_zip_file_t *file,
extra_field_central = ngx_zip_extra_field_central_template; extra_field_central = ngx_zip_extra_field_central_template;
extra_field_central.mtime = file->unix_time; extra_field_central.mtime = file->unix_time;


if (ctx->unicode_path) {
extra_field_unicode_path = ngx_zip_extra_field_unicode_path_template;
extra_field_unicode_path.crc32 = file->filename_utf8_crc32;
extra_field_unicode_path.size = sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len - 4;

central_directory_file_header.extra_field_len += sizeof(ngx_zip_extra_field_unicode_path_t) + file->filename_utf8.len;
}

ngx_memcpy(p, &central_directory_file_header, sizeof(ngx_zip_central_directory_file_header_t)); ngx_memcpy(p, &central_directory_file_header, sizeof(ngx_zip_central_directory_file_header_t));
p += sizeof(ngx_zip_central_directory_file_header_t); p += sizeof(ngx_zip_central_directory_file_header_t);


Expand All @@ -477,5 +589,12 @@ ngx_http_zip_write_central_directory_entry(u_char *p, ngx_http_zip_file_t *file,
p += extra_zip64_ptr_size; p += extra_zip64_ptr_size;
} }


if (ctx->unicode_path) {
ngx_memcpy(p, &extra_field_unicode_path, sizeof(ngx_zip_extra_field_unicode_path_t));
p += sizeof(ngx_zip_extra_field_unicode_path_t);

ngx_memcpy(p, file->filename_utf8.data, file->filename_utf8.len);
p += file->filename_utf8.len;
}
return p; return p;
} }
7 changes: 7 additions & 0 deletions ngx_http_zip_file_format.h
Expand Up @@ -25,6 +25,13 @@ typedef struct {
uint32_t mtime; uint32_t mtime;
} ngx_zip_extra_field_central_t; } ngx_zip_extra_field_central_t;


typedef struct {
uint16_t tag; //0x7075
uint16_t size;
uint8_t version; //1
uint32_t crc32;
} ngx_zip_extra_field_unicode_path_t;

typedef struct { // not entirely writen... typedef struct { // not entirely writen...
uint16_t tag; //0x0001 uint16_t tag; //0x0001
uint16_t size; // size of this record (32) uint16_t size; // size of this record (32)
Expand Down
3 changes: 3 additions & 0 deletions ngx_http_zip_module.h
Expand Up @@ -18,6 +18,8 @@ typedef struct {
ngx_uint_t dos_time; ngx_uint_t dos_time;
ngx_uint_t unix_time; ngx_uint_t unix_time;
ngx_str_t filename; ngx_str_t filename;
ngx_str_t filename_utf8;
uint32_t filename_utf8_crc32;
off_t size; off_t size;
off_t offset; off_t offset;


Expand Down Expand Up @@ -70,6 +72,7 @@ typedef struct {
unsigned abort:1; unsigned abort:1;
unsigned missing_crc32:1; // used in subrequest, if true = reads file into memory and calculates it; also to indicate presence of such file unsigned missing_crc32:1; // used in subrequest, if true = reads file into memory and calculates it; also to indicate presence of such file
unsigned zip64_used:1; unsigned zip64_used:1;
unsigned unicode_path:1;
} ngx_http_zip_ctx_t; } ngx_http_zip_ctx_t;


typedef struct { typedef struct {
Expand Down

0 comments on commit 46efeae

Please sign in to comment.