From c3e8d0a06554ab13800bfb510b19c19ae6bd9e29 Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Sun, 4 Feb 2018 00:27:33 +0100 Subject: [PATCH 1/9] replace pi const with M_PI --- tectonic/xetex0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tectonic/xetex0.c b/tectonic/xetex0.c index afbc33ea0..a3483cf72 100644 --- a/tectonic/xetex0.c +++ b/tectonic/xetex0.c @@ -17189,7 +17189,7 @@ void load_picture(bool is_pdf) y_size_req = 0.0; transform_concat(&t, &t2); } - make_rotation(&t2, Fix2D(cur_val) * 3.141592653589793 / ((double)180.0)); + make_rotation(&t2, Fix2D(cur_val) * M_PI / ((double)180.0)); { register int32_t for_end; i = 0; From 2d4da5c7250a2346d4e6b55d116fa090aaa6224f Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Sun, 4 Feb 2018 00:28:35 +0100 Subject: [PATCH 2/9] fix off-by-one (uninitialized read) looks to be introduced by bcdc052a. --- tectonic/xetexini.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tectonic/xetexini.c b/tectonic/xetexini.c index f0c220de5..fbf59b9df 100644 --- a/tectonic/xetexini.c +++ b/tectonic/xetexini.c @@ -3996,7 +3996,7 @@ tt_run_engine(char *dump_name, char *input_file_name) TEX_format_default = xmalloc (len + 2); TEX_format_default[0] = ' '; strcpy (TEX_format_default + 1, dump_name); - format_default_length = len + 2; + format_default_length = len + 1; /* Not sure why these get custom initializations. */ From b6105603f5432f952830fc253bea5638fe94f8bc Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Sat, 10 Feb 2018 19:46:17 +0100 Subject: [PATCH 3/9] cleanup name_of_file --- tectonic/XeTeX_ext.c | 52 +++++++++++------------ tectonic/XeTeX_ext.h | 2 +- tectonic/XeTeX_pic.c | 11 +++-- tectonic/io.c | 11 ++--- tectonic/xetex-shipout.c | 8 ++-- tectonic/xetex0.c | 18 ++++---- tectonic/xetexd.h | 2 +- tectonic/xetexini.c | 89 +++++----------------------------------- 8 files changed, 62 insertions(+), 131 deletions(-) diff --git a/tectonic/XeTeX_ext.c b/tectonic/XeTeX_ext.c index 8b83845a1..9ff2eadb8 100644 --- a/tectonic/XeTeX_ext.c +++ b/tectonic/XeTeX_ext.c @@ -133,40 +133,39 @@ get_encoding_mode_and_info(int32_t* info) */ UErrorCode err = U_ZERO_ERROR; UConverter* cnv; - char* name = (char*)name_of_file + 1; *info = 0; - if (strcasecmp(name, "auto") == 0) { + if (strcasecmp(name_of_file, "auto") == 0) { return AUTO; } - if (strcasecmp(name, "utf8") == 0) { + if (strcasecmp(name_of_file, "utf8") == 0) { return UTF8; } - if (strcasecmp(name, "utf16") == 0) { /* depends on host platform */ + if (strcasecmp(name_of_file, "utf16") == 0) { /* depends on host platform */ return US_NATIVE_UTF16; } - if (strcasecmp(name, "utf16be") == 0) { + if (strcasecmp(name_of_file, "utf16be") == 0) { return UTF16BE; } - if (strcasecmp(name, "utf16le") == 0) { + if (strcasecmp(name_of_file, "utf16le") == 0) { return UTF16LE; } - if (strcasecmp(name, "bytes") == 0) { + if (strcasecmp(name_of_file, "bytes") == 0) { return RAW; } /* try for an ICU converter */ - cnv = ucnv_open(name, &err); + cnv = ucnv_open(name_of_file, &err); if (cnv == NULL) { begin_diagnostic(); print_nl('U'); /* ensure message starts on a new line */ print_c_string("nknown encoding `"); - print_c_string(name); + print_c_string(name_of_file); print_c_string("'; reading as raw bytes"); end_diagnostic(1); return RAW; } else { ucnv_close(cnv); - *info = maketexstring(name); + *info = maketexstring(name_of_file); return ICUMAPPING; } } @@ -235,7 +234,7 @@ static char *saved_mapping_name = NULL; void check_for_tfm_font_mapping(void) { - char* cp = strstr((char*)name_of_file + 1, ":mapping="); + char* cp = strstr(name_of_file, ":mapping="); saved_mapping_name = mfree(saved_mapping_name); if (cp != NULL) { *cp = 0; @@ -711,7 +710,7 @@ splitFontName(char* name, char** var, char** feat, char** end, int* index) } void* -find_native_font(unsigned char* uname, int32_t scaled_size) +find_native_font(char* uname, int32_t scaled_size) /* scaled_size here is in TeX points, or is a negative integer for 'scaled_t' */ { void* rval = NULL; @@ -798,9 +797,8 @@ find_native_font(unsigned char* uname, int32_t scaled_size) if (varString != NULL) name_length += strlen(varString) + 1; free(name_of_file); - name_of_file = xmalloc(name_length + 4); /* +2 would be correct: initial space, final NUL */ - name_of_file[0] = ' '; - strcpy((char*)name_of_file + 1, fullName); + name_of_file = xmalloc(name_length + 3); /* +1 would be correct here (trailing \0). +3 is kept for Pascal-like access patterns. */ + strcpy(name_of_file, fullName); if (scaled_size < 0) { font = createFont(fontRef, scaled_size); @@ -843,14 +841,14 @@ find_native_font(unsigned char* uname, int32_t scaled_size) /* append the style and feature strings, so that \show\fontID will give a full result */ if (varString != NULL && *varString != 0) { - strcat((char*)name_of_file + 1, "/"); - strcat((char*)name_of_file + 1, varString); + strcat(name_of_file, "/"); + strcat(name_of_file, varString); } if (featString != NULL && *featString != 0) { - strcat((char*)name_of_file + 1, ":"); - strcat((char*)name_of_file + 1, featString); + strcat(name_of_file, ":"); + strcat(name_of_file, featString); } - name_length = strlen((char*)name_of_file + 1); + name_length = strlen(name_of_file); } } @@ -1035,7 +1033,7 @@ gr_font_get_named(int32_t what, void* pEngine) XeTeXLayoutEngine engine = (XeTeXLayoutEngine)pEngine; switch (what) { case XeTeX_find_feature_by_name: - rval = findGraphiteFeatureNamed(engine, (const char*)name_of_file + 1, name_length); + rval = findGraphiteFeatureNamed(engine, name_of_file, name_length); break; } return rval; @@ -1048,7 +1046,7 @@ gr_font_get_named_1(int32_t what, void* pEngine, int32_t param) XeTeXLayoutEngine engine = (XeTeXLayoutEngine)pEngine; switch (what) { case XeTeX_find_selector_by_name: - rval = findGraphiteFeatureSettingNamed(engine, param, (const char*)name_of_file + 1, name_length); + rval = findGraphiteFeatureSettingNamed(engine, param, name_of_file, name_length); break; } return rval; @@ -1828,15 +1826,15 @@ map_char_to_glyph(int32_t font, int32_t ch) int32_t map_glyph_to_index(int32_t font) - /* glyph name is at name_of_file+1 */ + /* glyph name is at name_of_file */ { #ifdef XETEX_MAC if (font_area[font] == AAT_FONT_FLAG) - return MapGlyphToIndex_AAT((CFDictionaryRef)(font_layout_engine[font]), (const char*)name_of_file + 1); + return MapGlyphToIndex_AAT((CFDictionaryRef)(font_layout_engine[font]), name_of_file); else #endif if (font_area[font] == OTGR_FONT_FLAG) - return mapGlyphToIndex((XeTeXLayoutEngine)(font_layout_engine[font]), (const char*)name_of_file + 1); + return mapGlyphToIndex((XeTeXLayoutEngine)(font_layout_engine[font]), name_of_file); else _tt_abort("bad native font flag in `map_glyph_to_index`"); } @@ -2021,7 +2019,7 @@ aat_font_get_named(int what, CFDictionaryRef attributes) CFArrayRef features = CTFontCopyFeatures(font); if (features) { CFDictionaryRef feature = findDictionaryInArray(features, kCTFontFeatureTypeNameKey, - (const char*)name_of_file + 1, name_length); + name_of_file, name_length); if (feature) { CFNumberRef identifier = CFDictionaryGetValue(feature, kCTFontFeatureTypeIdentifierKey); CFNumberGetValue(identifier, kCFNumberIntType, &rval); @@ -2047,7 +2045,7 @@ aat_font_get_named_1(int what, CFDictionaryRef attributes, int param) if (features) { CFDictionaryRef feature = findDictionaryInArrayWithIdentifier(features, kCTFontFeatureTypeIdentifierKey, param); if (feature) { - CFNumberRef selector = findSelectorByName(feature, (const char*)name_of_file + 1, name_length); + CFNumberRef selector = findSelectorByName(feature, name_of_file, name_length); if (selector) CFNumberGetValue(selector, kCFNumberIntType, &rval); } diff --git a/tectonic/XeTeX_ext.h b/tectonic/XeTeX_ext.h index 1891c79f7..37a702ce0 100644 --- a/tectonic/XeTeX_ext.h +++ b/tectonic/XeTeX_ext.h @@ -108,7 +108,7 @@ int linebreak_next(void); int get_encoding_mode_and_info(int32_t* info); void print_utf8_str(const unsigned char* str, int len); void print_chars(const unsigned short* str, int len); -void* find_native_font(unsigned char* name, int32_t scaled_size); +void* find_native_font(char* name, int32_t scaled_size); void release_font_engine(void* engine, int type_flag); int readCommonFeatures(const char* feat, const char* end, float* extend, float* slant, float* embolden, float* letterspace, uint32_t* rgbValue); diff --git a/tectonic/XeTeX_pic.c b/tectonic/XeTeX_pic.c index 5faad4175..c4ed11352 100644 --- a/tectonic/XeTeX_pic.c +++ b/tectonic/XeTeX_pic.c @@ -57,11 +57,11 @@ count_pdf_file_pages (void) rust_input_handle_t handle; pdf_file *pf; - handle = ttstub_input_open ((const char *) name_of_file + 1, TTIF_PICT, 0); + handle = ttstub_input_open (name_of_file, TTIF_PICT, 0); if (handle == NULL) return 0; - if ((pf = pdf_open((const char *) name_of_file + 1, handle)) == NULL) { + if ((pf = pdf_open(name_of_file, handle)) == NULL) { /* TODO: issue warning */ ttstub_input_close(handle); return 0; @@ -200,11 +200,10 @@ get_image_size_in_inches (rust_input_handle_t handle, float *width, float *heigh int find_pic_file (char **path, real_rect *bounds, int pdfBoxType, int page) { - char *in_path = (char *) name_of_file + 1; int err = -1; rust_input_handle_t handle; - handle = ttstub_input_open (in_path, TTIF_PICT, 0); + handle = ttstub_input_open (name_of_file, TTIF_PICT, 0); bounds->x = bounds->y = bounds->wd = bounds->ht = 0.0; if (handle == NULL) @@ -212,7 +211,7 @@ find_pic_file (char **path, real_rect *bounds, int pdfBoxType, int page) if (pdfBoxType != 0) { /* if cmd was \XeTeXpdffile, use xpdflib to read it */ - err = pdf_get_rect (in_path, handle, page, pdfBoxType, bounds); + err = pdf_get_rect (name_of_file, handle, page, pdfBoxType, bounds); } else { err = get_image_size_in_inches (handle, &bounds->wd, &bounds->ht); bounds->wd *= 72.27; @@ -220,7 +219,7 @@ find_pic_file (char **path, real_rect *bounds, int pdfBoxType, int page) } if (err == 0) - *path = xstrdup(in_path); + *path = xstrdup(name_of_file); ttstub_input_close (handle); diff --git a/tectonic/io.c b/tectonic/io.c index 3cf53e76c..8585ec475 100644 --- a/tectonic/io.c +++ b/tectonic/io.c @@ -28,7 +28,7 @@ tt_open_input (int filefmt) fullnameoffile = mfree(fullnameoffile); - fname = (char *) name_of_file + 1; + fname = name_of_file; if (filefmt == TTIF_TECTONIC_PRIMARY) handle = ttstub_input_open_primary (); @@ -40,8 +40,8 @@ tt_open_input (int filefmt) fullnameoffile = xstrdup(fname); name_length = strlen(fname); - name_of_file = xmalloc(name_length + 2); - strcpy((char *) name_of_file + 1, fname); + name_of_file = xmalloc(name_length + 1); + strcpy(name_of_file, fname); return handle; } @@ -475,7 +475,7 @@ get_uni_c(UFILE* f) void make_utf16_name(void) { - unsigned char* s = name_of_file + 1; + unsigned char* s = (unsigned char *) name_of_file; uint32_t rval; uint16_t* t; static int name16len = 0; @@ -485,7 +485,8 @@ make_utf16_name(void) name_of_file16 = xcalloc(name16len, sizeof(uint16_t)); } t = name_of_file16; - while (s <= name_of_file + name_length) { + + while (s < name_of_file + name_length) { uint16_t extraBytes; rval = *(s++); extraBytes = bytesFromUTF8[rval]; diff --git a/tectonic/xetex-shipout.c b/tectonic/xetex-shipout.c index 69a22900c..2145b9d2c 100644 --- a/tectonic/xetex-shipout.c +++ b/tectonic/xetex-shipout.c @@ -164,9 +164,9 @@ ship_out(int32_t p) if (job_name == 0) open_log_file(); pack_job_name(output_file_extension); - dvi_file = ttstub_output_open ((const char *) name_of_file + 1, 0); + dvi_file = ttstub_output_open (name_of_file, 0); if (dvi_file == NULL) - _tt_abort ("cannot open output file \"%s\"", name_of_file + 1); + _tt_abort ("cannot open output file \"%s\"", name_of_file); output_file_name = make_name_string(); } @@ -1697,9 +1697,9 @@ out_what(int32_t p) pack_file_name(cur_name, cur_area, cur_ext); - write_file[j] = ttstub_output_open((const char *) name_of_file + 1, 0); + write_file[j] = ttstub_output_open(name_of_file, 0); if (write_file[j] == NULL) - _tt_abort("cannot open output file \"%s\"", name_of_file + 1); + _tt_abort("cannot open output file \"%s\"", name_of_file); write_open[j] = true; diff --git a/tectonic/xetex0.c b/tectonic/xetex0.c index a3483cf72..960d9e459 100644 --- a/tectonic/xetex0.c +++ b/tectonic/xetex0.c @@ -10597,7 +10597,6 @@ pack_file_name(str_number n, str_number a, str_number e) str_number make_name_string(void) { - str_number Result; int32_t k; pool_pointer save_area_delimiter, save_ext_delimiter; bool save_name_in_progress, save_stop_at_space; @@ -10607,10 +10606,11 @@ make_name_string(void) make_utf16_name(); - for (k = 0; k <= name_length16 - 1; k++) + for (k = 0; k < name_length16; k++) str_pool[pool_ptr++] = name_of_file16[k]; - Result = make_string(); + + str_number Result = make_string(); save_area_delimiter = area_delimiter; save_ext_delimiter = ext_delimiter; @@ -10683,9 +10683,9 @@ open_log_file(void) pack_job_name(".log"); - log_file = ttstub_output_open ((const char *) name_of_file + 1, 0); + log_file = ttstub_output_open (name_of_file, 0); if (log_file == NULL) - _tt_abort ("cannot open log file output \"%s\"", name_of_file + 1); + _tt_abort ("cannot open log file output \"%s\"", name_of_file); texmf_log_name = make_name_string(); selector = SELECTOR_LOG_ONLY; @@ -10750,7 +10750,7 @@ start_input(const char *primary_input_name) if (!u_open_in(&input_file[cur_input.index], format, "rb", INTPAR(xetex_default_input_mode), INTPAR(xetex_default_input_encoding))) - _tt_abort ("failed to open input file \"%s\"", name_of_file + 1); + _tt_abort ("failed to open input file \"%s\"", name_of_file); /* Now re-encode `name_of_file` into the UTF-16 variable `name_of_file16`, * and use that to recompute `cur_{name,area,ext}`. */ @@ -11076,7 +11076,7 @@ internal_font_number load_native_font(int32_t u, str_number nom, str_number aire scaled_t ascent, descent, font_slant, x_ht, cap_ht; internal_font_number f; str_number full_name; - font_engine = find_native_font(name_of_file + 1, s); + font_engine = find_native_font(name_of_file, s); if (!font_engine) return FONT_BASE; if (s >= 0) @@ -11342,7 +11342,7 @@ read_font_info(int32_t u, str_number nom, str_number aire, scaled_t s) if (INTPAR(xetex_tracing_fonts) > 0) { begin_diagnostic(); print_nl_cstr("Requested font \""); - print_c_string((char *) (name_of_file + 1)); + print_c_string(name_of_file); print('"'); if (s < 0) { print_cstr(" scaled "); @@ -11775,7 +11775,7 @@ read_font_info(int32_t u, str_number nom, str_number aire, scaled_t s) } else if (file_opened) { begin_diagnostic(); print_nl_cstr(" -> "); - print_c_string((char *) (name_of_file + 1)); + print_c_string(name_of_file); end_diagnostic(false); } } diff --git a/tectonic/xetexd.h b/tectonic/xetexd.h index 98400378c..e6457e463 100644 --- a/tectonic/xetexd.h +++ b/tectonic/xetexd.h @@ -317,7 +317,7 @@ void remember_source_info(str_number, int); extern memory_word *eqtb; extern int32_t bad; -extern UTF8_code *name_of_file; +extern char *name_of_file; extern UTF16_code *name_of_file16; extern int32_t name_length; extern int32_t name_length16; diff --git a/tectonic/xetexini.c b/tectonic/xetexini.c index fbf59b9df..430cbbadd 100644 --- a/tectonic/xetexini.c +++ b/tectonic/xetexini.c @@ -13,7 +13,7 @@ /* All the following variables are declared in xetexd.h */ memory_word *eqtb; int32_t bad; -UTF8_code *name_of_file; +char *name_of_file; UTF16_code *name_of_file16; int32_t name_length; int32_t name_length16; @@ -455,7 +455,7 @@ do_dump (char *p, size_t item_size, size_t nitems, rust_output_handle_t out_file ssize_t r = ttstub_output_write (out_file, p, item_size * nitems); if (r < 0 || (size_t) r != item_size * nitems) _tt_abort ("could not write %zu %zu-byte item(s) to %s", - nitems, item_size, name_of_file+1); + nitems, item_size, name_of_file); /* Have to restore the old contents of memory, since some of it might get used again. */ @@ -471,7 +471,7 @@ do_undump (char *p, size_t item_size, size_t nitems, rust_input_handle_t in_file ssize_t r = ttstub_input_read (in_file, p, item_size * nitems); if (r < 0 || (size_t) r != item_size * nitems) _tt_abort("could not undump %zu %zu-byte item(s) from %s", - nitems, item_size, name_of_file+1); + nitems, item_size, name_of_file); swap_items (p, nitems, item_size); } @@ -2193,9 +2193,9 @@ store_fmt_file(void) format_ident = make_string(); pack_job_name(".fmt"); - fmt_out = ttstub_output_open ((const char *) name_of_file + 1, 0); + fmt_out = ttstub_output_open (name_of_file, 0); if (fmt_out == NULL) - _tt_abort ("cannot open format output file \"%s\"", name_of_file + 1); + _tt_abort ("cannot open format output file \"%s\"", name_of_file); print_nl_cstr("Beginning to dump on file "); print(make_name_string()); @@ -2518,77 +2518,11 @@ store_fmt_file(void) static void pack_buffered_name(small_number n, int32_t a, int32_t b) { - int32_t k; - UTF16_code c; - int32_t j; - - if (n + b - a + 5 > INT32_MAX) - b = a + INT32_MAX - n - 5; - free(name_of_file); - name_of_file = xmalloc_array(UTF8_code, n + (b - a + 1) + 5); - - k = 0; - - for (j = 1; j <= n; j++) { - /* This junk is append_to_name(), inlined, and with UTF-8 decoding, I - * think. */ - c = TEX_format_default[j]; - k++; - if (k <= INT32_MAX) { - if (c < 128) { - name_of_file[k] = c; - } else if (c < 2048) { - name_of_file[k++] = 192 + c / 64; - name_of_file[k] = 128 + c % 64; - } else { - name_of_file[k++] = 224 + c / 4096; - name_of_file[k++] = 128 + (c % 4096) / 64; - name_of_file[k] = 128 + (c % 4096) % 64; - } - } - } - - for (j = a; j <= b; j++) { - c = buffer[j]; - k++; - if (k <= INT32_MAX) { - if (c < 128) { - name_of_file[k] = c; - } else if (c < 2048) { - name_of_file[k++] = 192 + c / 64; - name_of_file[k] = 128 + c % 64; - } else { - name_of_file[k++] = 224 + c / 4096; - name_of_file[k++] = 128 + (c % 4096) / 64; - name_of_file[k] = 128 + (c % 4096) % 64; - } - } - } + name_of_file = xmalloc_array(UTF8_code, format_default_length + 1); - for (j = format_default_length - 3; j <= format_default_length; j++) { - c = TEX_format_default[j]; - k++; - if (k <= INT32_MAX) { - if (c < 128) { - name_of_file[k] = c; - } else if (c < 2048) { - name_of_file[k++] = 192 + c / 64; - name_of_file[k] = 128 + c % 64; - } else { - name_of_file[k++] = 224 + c / 4096; - name_of_file[k++] = 128 + (c % 4096) / 64; - name_of_file[k] = 128 + (c % 4096) % 64; - } - } - } - - if (k <= INT32_MAX) - name_length = k; - else - name_length = INT32_MAX; - - name_of_file[name_length + 1] = 0; + strcpy(name_of_file, TEX_format_default + 1); + name_length = strlen(name_of_file); } @@ -2607,9 +2541,9 @@ load_fmt_file(void) pack_buffered_name(format_default_length - 4, 1, 0); - fmt_in = ttstub_input_open((const char *) name_of_file + 1, TTIF_FORMAT, 0); + fmt_in = ttstub_input_open(name_of_file, TTIF_FORMAT, 0); if (fmt_in == NULL) - _tt_abort("cannot open the format file \"%s\"", (char *) name_of_file + 1); + _tt_abort("cannot open the format file \"%s\"", name_of_file); cur_input.loc = j; @@ -2632,7 +2566,7 @@ load_fmt_file(void) undump_int(x); if (x != FORMAT_SERIAL) _tt_abort("format file \"%s\" is of the wrong version: expected %d, found %d", - (char *) name_of_file + 1, FORMAT_SERIAL, x); + name_of_file, FORMAT_SERIAL, x); /* hash table parameters */ @@ -3991,7 +3925,6 @@ tt_run_engine(char *dump_name, char *input_file_name) /* TEX_format_default must get a leading space character for Pascal * style string magic. */ - size_t len = strlen (dump_name); TEX_format_default = xmalloc (len + 2); TEX_format_default[0] = ' '; From 7d0827063e5e154fb568e376d10038675c8a2f15 Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Sat, 10 Feb 2018 19:48:41 +0100 Subject: [PATCH 4/9] fix conversion of two-codepoint utf16 chars in gettexstring --- tectonic/texmfmp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tectonic/texmfmp.c b/tectonic/texmfmp.c index e76febc87..77d5f0246 100644 --- a/tectonic/texmfmp.c +++ b/tectonic/texmfmp.c @@ -99,14 +99,15 @@ gettexstring (str_number s) name = xmalloc(len * 3 + 1); /* max UTF16->UTF8 expansion (code units, not bytes) */ for (i = 0, j = 0; i < len; i++) { - unsigned int c = str_pool[i + str_start[s - 65536L]]; + uint32_t c = str_pool[i + str_start[s - 65536L]]; if (c >= 0xD800 && c <= 0xDBFF) { - unsigned int lo = str_pool[++i + str_start[s - 65536L]]; + uint32_t lo = str_pool[++i + str_start[s - 65536L]]; if (lo >= 0xDC00 && lo <= 0xDFFF) - c = (c - 0xD800) * 0x0400 + lo - 0xDC00; + c = (c - 0xD800) * 0x0400 + lo - 0xDC00 + 0x10000; else c = 0xFFFD; } + if (c < 0x80) bytesToWrite = 1; else if (c < 0x800) From e3eed8c119482097a3fd18c2e59d67f5f954a338 Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Sat, 10 Feb 2018 19:49:28 +0100 Subject: [PATCH 5/9] tests/util: some context on panics --- tests/util/mod.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/util/mod.rs b/tests/util/mod.rs index 7bea8b7de..3257ea2fd 100644 --- a/tests/util/mod.rs +++ b/tests/util/mod.rs @@ -119,9 +119,9 @@ pub struct ExpectedInfo { impl ExpectedInfo { pub fn read>(path: P) -> Self { let path = path.as_ref(); - let name = path.file_name().unwrap().to_owned(); + let name = path.file_name().expect(&format!("coudn't get file_name of {:?}", path)).to_owned(); - let mut f = File::open(path).unwrap(); + let mut f = File::open(path).expect(&format!("failed to open {:?}", path)); let mut contents = Vec::new(); f.read_to_end(&mut contents).unwrap(); @@ -169,7 +169,11 @@ impl ExpectedInfo { pub fn test_from_collection(&self, files: &HashMap>) { if !self.gzipped { - self.test_data(files.get(&self.name).unwrap()) + if let Some(data) = files.get(&self.name) { + self.test_data(data) + } else { + panic!("{:?} not in {:?}", self.name, files.keys().collect::>()) + } } else { let mut buf = Vec::new(); let mut dec = GzDecoder::new(&files.get(&self.name).unwrap()[..]); From 14cd40fe598fb45b564712f892494c6158780be9 Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Sat, 10 Feb 2018 19:51:43 +0100 Subject: [PATCH 6/9] fix and simplify pack_file_name --- tectonic/xetex0.c | 95 +++++++++-------------------------------------- 1 file changed, 18 insertions(+), 77 deletions(-) diff --git a/tectonic/xetex0.c b/tectonic/xetex0.c index 960d9e459..a3c3f19f5 100644 --- a/tectonic/xetex0.c +++ b/tectonic/xetex0.c @@ -10509,88 +10509,29 @@ end_name(void) void pack_file_name(str_number n, str_number a, str_number e) { - int32_t k; - UTF16_code c; - pool_pointer j; - - k = 0; - - free(name_of_file); - name_of_file = xmalloc_array(UTF8_code, (length(a) + length(n) + length(e)) * 3 + 1); - - /* Note that we populate name_of_file in an order different than how the - * arguments are passed to this function! - */ - - for (j = str_start[a - 65536L]; j <= str_start[(a + 1) - 65536L] - 1; j++) { - c = str_pool[j]; - k++; - - if (k <= INT32_MAX) { - if (c < 128) { - name_of_file[k] = c; - } else if (c < 2048) { - name_of_file[k] = 192 + c / 64; - k++; - name_of_file[k] = 128 + c % 64; - } else { - name_of_file[k] = 224 + c / 4096; - k++; - name_of_file[k] = 128 + (c % 4096) / 64; - k++; - name_of_file[k] = 128 + (c % 4096) % 64; - } - } - } - - for (j = str_start[n - 65536L]; j <= str_start[(n + 1) - 65536L] - 1; j++) { - c = str_pool[j]; - k++; + // Note that we populate the buffer in an order different than how the + // arguments are passed to this function! + char* work_buffer = xmalloc_array(UTF8_code, (length(a) + length(n) + length(e)) * 3 + 1); + work_buffer[0] = '\0'; - if (k <= INT32_MAX) { - if (c < 128) { - name_of_file[k] = c; - } else if (c < 2048) { - name_of_file[k] = 192 + c / 64; - k++; - name_of_file[k] = 128 + c % 64; - } else { - name_of_file[k] = 224 + c / 4096; - k++; - name_of_file[k] = 128 + (c % 4096) / 64; - k++; - name_of_file[k] = 128 + (c % 4096) % 64; - } - } - } + char* a_utf8 = gettexstring(a); + strcat(work_buffer, a_utf8); + free(a_utf8); - for (j = str_start[e - 65536L]; j <= str_start[(e + 1) - 65536L] - 1; j++) { - c = str_pool[j]; - k++; + char* n_utf8 = gettexstring(n); + strcat(work_buffer, n_utf8); + free(n_utf8); - if (k <= INT32_MAX) { - if (c < 128) { - name_of_file[k] = c; - } else if (c < 2048) { - name_of_file[k] = 192 + c / 64; - k++; - name_of_file[k] = 128 + c % 64; - } else { - name_of_file[k] = 224 + c / 4096; - k++; - name_of_file[k] = 128 + (c % 4096) / 64; - k++; - name_of_file[k] = 128 + (c % 4096) % 64; - } - } - } + char* e_utf8 = gettexstring(e); + strcat(work_buffer, e_utf8); + free(e_utf8); - if (k <= INT32_MAX) - name_length = k; - else - name_length = INT32_MAX; + name_length = strlen(work_buffer); - name_of_file[name_length + 1] = 0; + free(name_of_file); + name_of_file = xmalloc_array(char, name_length + 1); + strcpy(name_of_file, work_buffer); + free(work_buffer); } From 9e50bd377bc01f902a5baf9379ea59fba782d4af Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Sat, 10 Feb 2018 20:15:42 +0100 Subject: [PATCH 7/9] start_input inline more_name and fix utf8->utf16 conversion --- tectonic/xetex0.c | 42 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/tectonic/xetex0.c b/tectonic/xetex0.c index a3c3f19f5..da79accbd 100644 --- a/tectonic/xetex0.c +++ b/tectonic/xetex0.c @@ -10655,7 +10655,6 @@ start_input(const char *primary_input_name) { tt_input_format_type format = TTIF_TEX; str_number temp_str; - int32_t k; if (primary_input_name != NULL) { /* If this is the case, we're opening the primary input file, and the @@ -10669,9 +10668,42 @@ start_input(const char *primary_input_name) name_in_progress = true; begin_name(); stop_at_space = false; - k = 0; - while (primary_input_name[k] && more_name(primary_input_name[k])) - k++; + + + + const unsigned char *cp = (const unsigned char *) primary_input_name; + + if (pool_ptr + strlen(primary_input_name) * 2 >= pool_size) + _tt_abort ("string pool overflow [%i bytes]", (int) pool_size); + + UInt32 rval; + while ((rval = *(cp++)) != 0) { + UInt16 extraBytes = bytesFromUTF8[rval]; + switch (extraBytes) { /* note: code falls through cases! */ + case 5: rval <<= 6; if (*cp) rval += *(cp++); + case 4: rval <<= 6; if (*cp) rval += *(cp++); + case 3: rval <<= 6; if (*cp) rval += *(cp++); + case 2: rval <<= 6; if (*cp) rval += *(cp++); + case 1: rval <<= 6; if (*cp) rval += *(cp++); + case 0: ; + }; + rval -= offsetsFromUTF8[extraBytes]; + if (rval > 0xffff) { + rval -= 0x10000; + str_pool[pool_ptr++] = 0xd800 + rval / 0x0400; + str_pool[pool_ptr++] = 0xdc00 + rval % 0x0400; + } else { + str_pool[pool_ptr++] = rval; + } + + if (IS_DIR_SEP(rval)) { + area_delimiter = pool_ptr - str_start[str_ptr - 65536L]; + ext_delimiter = 0; + } else if (rval == '.' ) { + ext_delimiter = pool_ptr - str_start[str_ptr - 65536L]; + } + } + stop_at_space = true; end_name(); name_in_progress = false; @@ -10700,7 +10732,7 @@ start_input(const char *primary_input_name) name_in_progress = true; begin_name(); stop_at_space = false; - k = 0; + int k = 0; while (k < name_length16 && more_name(name_of_file16[k])) k++; stop_at_space = true; From 82386e92e56231d16d290938aaa09358dd19efb1 Mon Sep 17 00:00:00 2001 From: Mrmaxmeier Date: Sat, 10 Feb 2018 19:51:03 +0100 Subject: [PATCH 8/9] add unicode_file_name test --- tests/tex-outputs.rs | 3 +++ ...en \360\237\220\250 welt \360\237\214\215.log" | 7 +++++++ ...en \360\237\220\250 welt \360\237\214\215.tex" | 3 +++ ...en \360\237\220\250 welt \360\237\214\215.xdv" | Bin 0 -> 248 bytes 4 files changed, 13 insertions(+) create mode 100644 "tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.log" create mode 100644 "tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.tex" create mode 100644 "tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.xdv" diff --git a/tests/tex-outputs.rs b/tests/tex-outputs.rs index f9b4b38cb..3e1cec876 100644 --- a/tests/tex-outputs.rs +++ b/tests/tex-outputs.rs @@ -160,6 +160,9 @@ fn pdfoutput() { TestCase::new("pdfoutput").go() } #[test] fn synctex() { TestCase::new("synctex").check_synctex(true).go() } +#[test] +fn unicode_file_name() { TestCase::new("hallöchen 🐨 welt 🌍.tex").expect(Ok(TexResult::Warnings)).go() } + #[test] fn tectoniccodatokens_errinside() { TestCase::new("tectoniccodatokens_errinside") diff --git "a/tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.log" "b/tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.log" new file mode 100644 index 000000000..61c488b38 --- /dev/null +++ "b/tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.log" @@ -0,0 +1,7 @@ +** +(hallöchen 🐨 welt 🌍.tex +Missing character: There is no ö in font cmr10! +Missing character: There is no 🐨 in font cmr10! +Missing character: There is no 🌍 in font cmr10! + [1] ) +Output written on hallöchen 🐨 welt 🌍.xdv (1 page, 248 bytes). diff --git "a/tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.tex" "b/tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.tex" new file mode 100644 index 000000000..d39483abf --- /dev/null +++ "b/tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.tex" @@ -0,0 +1,3 @@ +hello world +\jobname +\bye diff --git "a/tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.xdv" "b/tests/tex-outputs/hall\303\266chen \360\237\220\250 welt \360\237\214\215.xdv" new file mode 100644 index 0000000000000000000000000000000000000000..d177edc24bf4070b8b4a9115561397e478487286 GIT binary patch literal 248 zcmey)&e%NZfQ&T*5HP>sC`nB&$ Date: Tue, 27 Feb 2018 14:54:29 +0100 Subject: [PATCH 9/9] test reading of utf16{be,le} encoded files --- tests/tex-outputs.rs | 89 +++++++++++++++----- tests/tex-outputs/file_encoding.log | 10 +++ tests/tex-outputs/file_encoding.tex | 7 ++ tests/tex-outputs/file_encoding.xdv | Bin 0 -> 300 bytes tests/tex-outputs/file_encoding_utf16be.txt | Bin 0 -> 58 bytes tests/tex-outputs/file_encoding_utf16le.txt | Bin 0 -> 64 bytes tests/tex-outputs/file_encoding_utf8.txt | 2 + 7 files changed, 87 insertions(+), 21 deletions(-) create mode 100644 tests/tex-outputs/file_encoding.log create mode 100644 tests/tex-outputs/file_encoding.tex create mode 100644 tests/tex-outputs/file_encoding.xdv create mode 100644 tests/tex-outputs/file_encoding_utf16be.txt create mode 100644 tests/tex-outputs/file_encoding_utf16le.txt create mode 100644 tests/tex-outputs/file_encoding_utf8.txt diff --git a/tests/tex-outputs.rs b/tests/tex-outputs.rs index 3e1cec876..227f81f93 100644 --- a/tests/tex-outputs.rs +++ b/tests/tex-outputs.rs @@ -1,23 +1,25 @@ // Copyright 2016-2018 the Tectonic Project // Licensed under the MIT License. -#[macro_use] extern crate lazy_static; +#[macro_use] +extern crate lazy_static; extern crate tectonic; use std::collections::HashSet; use std::env; use std::sync::Mutex; +use std::path::Path; -use tectonic::errors::{DefinitelySame, ErrorKind, Result}; -use tectonic::engines::NoopIoEventBackend; use tectonic::engines::tex::TexResult; -use tectonic::io::{FilesystemIo, FilesystemPrimaryInputIo, IoStack, MemoryIo}; +use tectonic::engines::NoopIoEventBackend; +use tectonic::errors::{DefinitelySame, ErrorKind, Result}; use tectonic::io::testing::SingleInputFileIo; +use tectonic::io::{FilesystemIo, FilesystemPrimaryInputIo, IoProvider, IoStack, MemoryIo}; use tectonic::status::NoopStatusBackend; use tectonic::{TexEngine, XdvipdfmxEngine}; mod util; -use util::{ExpectedInfo, ensure_plain_format, test_path}; +use util::{ensure_plain_format, test_path, ExpectedInfo}; lazy_static! { static ref LOCK: Mutex = Mutex::new(0u8); @@ -28,9 +30,9 @@ struct TestCase { expected_result: Result, check_synctex: bool, check_pdf: bool, + extra_io: Vec>, } - impl TestCase { fn new(stem: &str) -> Self { TestCase { @@ -38,6 +40,7 @@ impl TestCase { expected_result: Ok(TexResult::Spotless), check_synctex: false, check_pdf: false, + extra_io: Vec::new(), } } @@ -51,6 +54,16 @@ impl TestCase { self } + fn with_fs(&mut self, path: &Path) -> &mut Self { + self.extra_io.push(Box::new(FilesystemIo::new( + path, + false, + false, + HashSet::new(), + ))); + self + } + fn expect(&mut self, result: Result) -> &mut Self { self.expected_result = result; self @@ -60,7 +73,7 @@ impl TestCase { self.expect(Err(ErrorKind::Msg(msg.to_owned()).into())) } - fn go(&self) { + fn go(&mut self) { let _guard = LOCK.lock().unwrap(); // until we're thread-safe ... let expect_xdv = self.expected_result.is_ok(); @@ -69,7 +82,8 @@ impl TestCase { // IoProvider for the format file; with magic to generate the format // on-the-fly if needed. - let mut fmt = SingleInputFileIo::new(&ensure_plain_format().expect("couldn't write format file")); + let mut fmt = + SingleInputFileIo::new(&ensure_plain_format().expect("couldn't write format file")); // Set up some useful paths, and the IoProvider for the primary input file. p.push("tex-outputs"); @@ -96,12 +110,17 @@ impl TestCase { // Run the engine(s)! let res = { - let mut io = IoStack::new(vec![&mut mem, &mut tex, &mut fmt, &mut assets]); + let mut io_list: Vec<&mut IoProvider> = vec![&mut mem, &mut tex, &mut fmt, &mut assets]; + for io in &mut self.extra_io { + io_list.push(&mut **io); + } + let mut io = IoStack::new(io_list); + let mut events = NoopIoEventBackend::new(); let mut status = NoopStatusBackend::new(); - let tex_res = TexEngine::new() - .process(&mut io, &mut events, &mut status, "plain.fmt", &texname); + let tex_res = + TexEngine::new().process(&mut io, &mut events, &mut status, "plain.fmt", &texname); if self.check_pdf && tex_res.definitely_same(&Ok(TexResult::Spotless)) { // While the xdv and log output is deterministic without setting @@ -119,7 +138,10 @@ impl TestCase { }; if !res.definitely_same(&self.expected_result) { - panic!(format!("expected TeX result {:?}, got {:?}", self.expected_result, res)); + panic!(format!( + "expected TeX result {:?}, got {:?}", + self.expected_result, res + )); } // Check that outputs match expectations. @@ -142,26 +164,47 @@ impl TestCase { } } - // Keep these alphabetized. #[test] -fn md5_of_hello() { TestCase::new("md5_of_hello").check_pdf(true).go() } +fn md5_of_hello() { + TestCase::new("md5_of_hello").check_pdf(true).go() +} + +#[test] +fn negative_roman_numeral() { + TestCase::new("negative_roman_numeral").go() +} #[test] -fn negative_roman_numeral() { TestCase::new("negative_roman_numeral").go() } +fn tex_logo() { + TestCase::new("tex_logo").go() +} #[test] -fn tex_logo() { TestCase::new("tex_logo").go() } +fn pdfoutput() { + TestCase::new("pdfoutput").go() +} #[test] -fn pdfoutput() { TestCase::new("pdfoutput").go() } +fn synctex() { + TestCase::new("synctex").check_synctex(true).go() +} #[test] -fn synctex() { TestCase::new("synctex").check_synctex(true).go() } +fn unicode_file_name() { + TestCase::new("hallöchen 🐨 welt 🌍.tex") + .expect(Ok(TexResult::Warnings)) + .go() +} #[test] -fn unicode_file_name() { TestCase::new("hallöchen 🐨 welt 🌍.tex").expect(Ok(TexResult::Warnings)).go() } +fn file_encoding() { + TestCase::new("file_encoding.tex") + .with_fs(&test_path(&["tex-outputs"])) + .expect(Ok(TexResult::Warnings)) + .go() +} #[test] fn tectoniccodatokens_errinside() { @@ -178,7 +221,11 @@ fn tectoniccodatokens_noend() { } #[test] -fn tectoniccodatokens_ok() { TestCase::new("tectoniccodatokens_ok").go() } +fn tectoniccodatokens_ok() { + TestCase::new("tectoniccodatokens_ok").go() +} #[test] -fn the_letter_a() { TestCase::new("the_letter_a").check_pdf(true).go() } +fn the_letter_a() { + TestCase::new("the_letter_a").check_pdf(true).go() +} diff --git a/tests/tex-outputs/file_encoding.log b/tests/tex-outputs/file_encoding.log new file mode 100644 index 000000000..d536d5308 --- /dev/null +++ b/tests/tex-outputs/file_encoding.log @@ -0,0 +1,10 @@ +** +(file_encoding.tex (file_encoding_utf8.txt +Missing character: There is no 🌍 in font cmr10! +) (file_encoding_utf16be.txt +Missing character: There is no 🌍 in font cmr10! +) +(file_encoding_utf16le.txt +Missing character: There is no 🌍 in font cmr10! +) [1] ) +Output written on file_encoding.xdv (1 page, 300 bytes). diff --git a/tests/tex-outputs/file_encoding.tex b/tests/tex-outputs/file_encoding.tex new file mode 100644 index 000000000..f1faf87d0 --- /dev/null +++ b/tests/tex-outputs/file_encoding.tex @@ -0,0 +1,7 @@ +\input file_encoding_utf8.txt + +\input file_encoding_utf16be.txt + +\input file_encoding_utf16le.txt + +\bye diff --git a/tests/tex-outputs/file_encoding.xdv b/tests/tex-outputs/file_encoding.xdv new file mode 100644 index 0000000000000000000000000000000000000000..f11c070da7269dab4b8d9ee934b0b7558d6e5dd5 GIT binary patch literal 300 zcmey)&e%NZfQ&T*5HP>sC`nB&$626QQZf_s`W9kWk&{_cl9LKk)Hh!OXw)Qz o7YU0E`+9x=trSQ0Fw<<~TZ~r^x-&2_Fk*GuPoR!>?DxR{0CcNgrvLx| literal 0 HcmV?d00001 diff --git a/tests/tex-outputs/file_encoding_utf16be.txt b/tests/tex-outputs/file_encoding_utf16be.txt new file mode 100644 index 0000000000000000000000000000000000000000..8659b689c5f880b063ad561fd022cd3cfaa24f1e GIT binary patch literal 58 zcmezOpP_^ygCUclm_Y$bmI6i67z`QAfFemiv2-Av%8