Skip to content

Commit

Permalink
Further optimize font width loading (Issue #354)
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelrsweet committed Aug 29, 2019
1 parent 3992ca7 commit 66d663f
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 76 deletions.
3 changes: 2 additions & 1 deletion htmldoc/html.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ extern typeface_t _htmlBodyFont,
_htmlHeadingFont;
extern int _htmlInitialized;
extern char _htmlCharSet[];
extern int _htmlWidthsLoaded[TYPE_MAX][STYLE_MAX];
extern short _htmlWidths[TYPE_MAX][STYLE_MAX][256];
extern short _htmlWidthsAll[TYPE_MAX][STYLE_MAX][65536];
extern int _htmlUnicode[];
Expand Down Expand Up @@ -318,7 +319,7 @@ extern void htmlSetBaseSize(double p, double s);
extern void htmlSetCharSet(const char *cs);
extern void htmlSetTextColor(uchar *color);

extern void htmlLoadFontWidths(void);
extern void htmlLoadFontWidths(int typeface, int style);

extern void htmlDebugStats(const char *title, tree_t *t);

Expand Down
154 changes: 81 additions & 73 deletions htmldoc/htmllib.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,17 @@ typeface_t _htmlBodyFont = TYPE_TIMES,
int _htmlInitialized = 0; /* Initialized glyphs yet? */
char _htmlCharSet[256] = "iso-8859-1";
/* Character set name */
extern int _htmlWidthsLoaded[TYPE_MAX][STYLE_MAX] =
{ /* Have the widths been loaded? */
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 },
{ 0, 0, 0, 0 }
};
short _htmlWidths[TYPE_MAX][STYLE_MAX][256];
/* Character widths of fonts */
short _htmlWidthsAll[TYPE_MAX][STYLE_MAX][65536];
Expand Down Expand Up @@ -2225,9 +2236,8 @@ htmlGetVariable(tree_t *t, /* I - Tree entry */
*/

void
htmlLoadFontWidths(void)
htmlLoadFontWidths(int typeface, int style)
{
int i, j; /* Looping vars */
char filename[1024]; /* Filenames */
FILE *fp; /* Files */
int ch; /* Character */
Expand All @@ -2240,91 +2250,83 @@ htmlLoadFontWidths(void)
* Now read all of the font widths...
*/

for (i = 0; i < TYPE_MAX; i ++)
{
for (j = 0; j < STYLE_MAX; j ++)
{
for (ch = 0; ch < 256; ch ++)
_htmlWidths[i][j][ch] = 600;
for (ch = 0; ch < 256; ch ++)
_htmlWidths[typeface][style][ch] = 600;

if (_htmlUTF8)
{
for (ch = 0; ch < 65536; ch ++)
_htmlWidthsAll[i][j][ch] = 600;
}
if (_htmlUTF8)
{
for (ch = 0; ch < 65536; ch ++)
_htmlWidthsAll[typeface][style][ch] = 600;
}

snprintf(filename, sizeof(filename), "%s/fonts/%s.afm", _htmlData,
_htmlFonts[i][j]);
if ((fp = fopen(filename, "r")) == NULL)
{
snprintf(filename, sizeof(filename), "%s/fonts/%s.afm", _htmlData, _htmlFonts[typeface][style]);
if ((fp = fopen(filename, "r")) == NULL)
{
#ifndef DEBUG
progress_error(HD_ERROR_FILE_NOT_FOUND,
"Unable to open font width file %s!", filename);
progress_error(HD_ERROR_FILE_NOT_FOUND, "Unable to open font width file %s!", filename);
#endif /* !DEBUG */
continue;
}

while (fgets(line, sizeof(line), fp) != NULL)
{
if (strncmp(line, "C ", 2) != 0)
continue;
return;
}

if (i < TYPE_SYMBOL)
{
/*
* Handle encoding of Courier, Times, and Helvetica using
* assigned charset...
*/
while (fgets(line, sizeof(line), fp) != NULL)
{
if (strncmp(line, "C ", 2) != 0)
continue;

if (sscanf(line, "%*s%*s%*s%*s%f%*s%*s%63s", &width, glyph) != 2)
continue;
if (typeface < TYPE_SYMBOL)
{
/*
* Handle encoding of regular fonts using assigned charset...
*/

for (ch = 0; ch < 256; ch ++)
{
if (_htmlGlyphs[ch] && !strcmp(_htmlGlyphs[ch], glyph))
{
_htmlWidths[i][j][ch] = (short)width;
break;
}
}
if (sscanf(line, "%*s%*s%*s%*s%f%*s%*s%63s", &width, glyph) != 2)
continue;

if (_htmlUTF8)
{
for (ch = 0; ch < 65536; ch ++)
{
if (_htmlGlyphsAll[ch] && !strcmp(_htmlGlyphsAll[ch], glyph))
{
_htmlWidthsAll[i][j][ch] = (short)width;
break;
}
}
}
}
else
for (ch = 0; ch < 256; ch ++)
{
if (_htmlGlyphs[ch] && !strcmp(_htmlGlyphs[ch], glyph))
{
/*
* Symbol and dingbats fonts uses their own encoding...
*/

if (sscanf(line, "%*s%d%*s%*s%f", &ch, &width) != 2)
continue;
_htmlWidths[typeface][style][ch] = (short)width;
break;
}
}

if (ch < 256 && ch >= 0)
{
_htmlWidths[i][j][ch] = (short)width;
_htmlWidthsAll[i][j][ch] = (short)width;
}
if (_htmlUTF8)
{
for (ch = 0; ch < 65536; ch ++)
{
if (_htmlGlyphsAll[ch] && !strcmp(_htmlGlyphsAll[ch], glyph))
{
_htmlWidthsAll[typeface][style][ch] = (short)width;
break;
}
}
}
}
else
{
/*
* Symbol and Dingbats fonts uses their own encoding...
*/

fclose(fp);
if (sscanf(line, "%*s%d%*s%*s%f", &ch, &width) != 2)
continue;

// Make sure that non-breaking space has the same width as
// a breaking space...
_htmlWidths[i][j][160] = _htmlWidths[i][j][32];
_htmlWidthsAll[i][j][160] = _htmlWidthsAll[i][j][32];
if (ch < 256 && ch >= 0)
{
_htmlWidths[typeface][style][ch] = (short)width;
_htmlWidthsAll[typeface][style][ch] = (short)width;
}
}
}

fclose(fp);

// Make sure that non-breaking space has the same width as a breaking space...
_htmlWidths[typeface][style][160] = _htmlWidths[typeface][style][32];
_htmlWidthsAll[typeface][style][160] = _htmlWidthsAll[typeface][style][32];

_htmlWidthsLoaded[typeface][style] = 1;
}


Expand Down Expand Up @@ -2480,7 +2482,7 @@ htmlSetCharSet(const char *cs) /* I - Character set file to load */
_htmlUnicode[i] = i;
}

htmlLoadFontWidths();
memset(_htmlWidthsLoaded, 0, sizeof(_htmlWidthsLoaded));
return;
}

Expand Down Expand Up @@ -2539,7 +2541,7 @@ htmlSetCharSet(const char *cs) /* I - Character set file to load */
_htmlUnicode[i] = chars[i];
}

htmlLoadFontWidths();
memset(_htmlWidthsLoaded, 0, sizeof(_htmlWidthsLoaded));
}


Expand Down Expand Up @@ -3304,10 +3306,16 @@ compute_size(tree_t *t) /* I - Tree entry */
if (int_width > max_width)
max_width = int_width;

if (!_htmlWidthsLoaded[t->typeface][t->style])
htmlLoadFontWidths(t->typeface, t->style);

width = _htmlWidths[t->typeface][t->style][0x20] * max_width * 0.001f;
}
else if (t->data)
{
if (!_htmlWidthsLoaded[t->typeface][t->style])
htmlLoadFontWidths(t->typeface, t->style);

for (int_width = 0, ptr = t->data; *ptr != '\0'; ptr ++)
int_width += _htmlWidths[t->typeface][t->style][(int)*ptr & 255];

Expand Down
13 changes: 11 additions & 2 deletions htmldoc/iso8859.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,17 @@ iso8859(uchar *name) /* I - Glyph name */

_htmlUnicode[newch] = ch;

// Reload font widths...
htmlLoadFontWidths();
// Update font widths...
for (int typeface = 0; typeface < TYPE_MAX; typeface ++)
{
for (int style = 0; style < STYLE_MAX; style ++)
{
if (_htmlWidthsLoaded[typeface][style])
{
_htmlWidths[typeface][style][newch] = _htmlWidthsAll[typeface][style][ch];
}
}
}

// Return the new character...
return ((uchar)newch);
Expand Down
3 changes: 3 additions & 0 deletions htmldoc/ps-pdf.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -9615,6 +9615,9 @@ get_width(uchar *s, /* I - String to scan */
if (s == NULL)
return (0.0);

if (!_htmlWidthsLoaded[typeface][style])
htmlLoadFontWidths(typeface, style);

for (width = 0, ptr = s; *ptr != '\0'; ptr ++)
width += _htmlWidths[typeface][style][*ptr];

Expand Down
26 changes: 26 additions & 0 deletions testsuite/benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/sh
#
# Script to benchmark the run time of HTMLDOC with various test files...
#
# Usage:
#
# ./benchmark.sh [path-to-htmldoc]
#

if test $# -gt 0; then
htmldoc="$1"
else
htmldoc="../htmldoc/htmldoc"
fi

HTMLDOC_DATA=".."; export HTMLDOC_DATA

for file in *.html; do
echo "$file,`/usr/bin/time -p $htmldoc --quiet --webpage -f t.pdf $file 2>&1 | grep real | awk '{print $2}'`"
done

for file in *.md; do
echo "$file,`/usr/bin/time -p $htmldoc --quiet --charset utf-8 --webpage -f t.pdf $file 2>&1 | grep real | awk '{print $2}'`"
done

echo "htmldoc.book,`/usr/bin/time -p $htmldoc --quiet --batch ../doc/htmldoc.book -f t.pdf 2>&1 | grep real | awk '{print $2}'`"

0 comments on commit 66d663f

Please sign in to comment.