Skip to content

Commit

Permalink
Merge pull request #7550 from Snuffleupagus/Type1-toUnicode-builtInEn…
Browse files Browse the repository at this point in the history
…coding-fallback

For embedded Type1 fonts without included `ToUnicode`/`Encoding` data, attempt to improve text selection by using the `builtInEncoding` to amend the `toUnicode` map (issue 6901, issue 7182, issue 7217, bug 917796, bug 1242142)
  • Loading branch information
Snuffleupagus committed Sep 16, 2016
2 parents 834a7ff + 325f7af commit 4acd31f
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 8 deletions.
5 changes: 4 additions & 1 deletion src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -1757,6 +1757,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {

properties.differences = differences;
properties.baseEncodingName = baseEncodingName;
properties.hasEncoding = !!baseEncodingName || differences.length > 0;
properties.dict = dict;
return toUnicodePromise.then(function(toUnicode) {
properties.toUnicode = toUnicode;
Expand All @@ -1774,8 +1775,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
* {ToUnicodeMap|IdentityToUnicodeMap} object.
*/
buildToUnicode: function PartialEvaluator_buildToUnicode(properties) {
properties.hasIncludedToUnicodeMap =
!!properties.toUnicode && properties.toUnicode.length > 0;
// Section 9.10.2 Mapping Character Codes to Unicode Values
if (properties.toUnicode && properties.toUnicode.length !== 0) {
if (properties.hasIncludedToUnicodeMap) {
return Promise.resolve(properties.toUnicode);
}
// According to the spec if the font is a simple font we should only map
Expand Down
53 changes: 46 additions & 7 deletions src/core/fonts.js
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,30 @@ function adjustWidths(properties) {
properties.defaultWidth *= scale;
}

function adjustToUnicode(properties, builtInEncoding) {
if (properties.hasIncludedToUnicodeMap) {
return; // The font dictionary has a `ToUnicode` entry.
}
if (properties.hasEncoding) {
return; // The font dictionary has an `Encoding` entry.
}
if (builtInEncoding === properties.defaultEncoding) {
return; // No point in trying to adjust `toUnicode` if the encodings match.
}
if (properties.toUnicode instanceof IdentityToUnicodeMap) {
return;
}
var toUnicode = [], glyphsUnicodeMap = getGlyphsUnicode();
for (var charCode in builtInEncoding) {
var glyphName = builtInEncoding[charCode];
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
if (unicode !== -1) {
toUnicode[charCode] = String.fromCharCode(unicode);
}
}
properties.toUnicode.amend(toUnicode);
}

function getFontType(type, subtype) {
switch (type) {
case 'Type1':
Expand Down Expand Up @@ -261,7 +285,13 @@ var ToUnicodeMap = (function ToUnicodeMapClosure() {

charCodeOf: function(v) {
return this._map.indexOf(v);
}
},

amend: function (map) {
for (var charCode in map) {
this._map[charCode] = map[charCode];
}
},
};

return ToUnicodeMap;
Expand Down Expand Up @@ -297,7 +327,11 @@ var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() {

charCodeOf: function (v) {
return (isInt(v) && v >= this.firstChar && v <= this.lastChar) ? v : -1;
}
},

amend: function (map) {
error('Should not call amend()');
},
};

return IdentityToUnicodeMap;
Expand Down Expand Up @@ -765,6 +799,7 @@ var Font = (function FontClosure() {
this.fontMatrix = properties.fontMatrix;
this.widths = properties.widths;
this.defaultWidth = properties.defaultWidth;
this.toUnicode = properties.toUnicode;
this.encoding = properties.baseEncoding;
this.seacMap = properties.seacMap;

Expand Down Expand Up @@ -2386,10 +2421,8 @@ var Font = (function FontClosure() {
} else {
// Most of the following logic in this code branch is based on the
// 9.6.6.4 of the PDF spec.
var hasEncoding =
properties.differences.length > 0 || !!properties.baseEncodingName;
var cmapTable =
readCmapTable(tables['cmap'], font, this.isSymbolicFont, hasEncoding);
var cmapTable = readCmapTable(tables['cmap'], font, this.isSymbolicFont,
properties.hasEncoding);
var cmapPlatformId = cmapTable.platformId;
var cmapEncodingId = cmapTable.encodingId;
var cmapMappings = cmapTable.mappings;
Expand All @@ -2398,7 +2431,7 @@ var Font = (function FontClosure() {
// The spec seems to imply that if the font is symbolic the encoding
// should be ignored, this doesn't appear to work for 'preistabelle.pdf'
// where the the font is symbolic and it has an encoding.
if (hasEncoding &&
if (properties.hasEncoding &&
(cmapPlatformId === 3 && cmapEncodingId === 1 ||
cmapPlatformId === 1 && cmapEncodingId === 0) ||
(cmapPlatformId === -1 && cmapEncodingId === -1 && // Temporary hack
Expand Down Expand Up @@ -2562,6 +2595,12 @@ var Font = (function FontClosure() {
// TODO: Check the charstring widths to determine this.
properties.fixedPitch = false;

if (properties.builtInEncoding) {
// For Type1 fonts that do not include either `ToUnicode` or `Encoding`
// data, attempt to use the `builtInEncoding` to improve text selection.
adjustToUnicode(properties, properties.builtInEncoding);
}

var mapping = font.getGlyphMapping(properties);
var newMapping = adjustMapping(mapping, properties);
this.toFontChar = newMapping.toFontChar;
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
!issue5808.pdf
!issue6204.pdf
!issue6782.pdf
!issue6901.pdf
!issue6961.pdf
!issue6962.pdf
!issue7020.pdf
Expand Down
Binary file added test/pdfs/issue6901.pdf
Binary file not shown.
14 changes: 14 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -1220,6 +1220,20 @@
"link": false,
"type": "text"
},
{ "id": "issue6901-eq",
"file": "pdfs/issue6901.pdf",
"md5": "1a0604b1a7a3aaf2162b425a9a84230b",
"rounds": 1,
"link": false,
"type": "eq"
},
{ "id": "issue6901-text",
"file": "pdfs/issue6901.pdf",
"md5": "1a0604b1a7a3aaf2162b425a9a84230b",
"rounds": 1,
"link": false,
"type": "text"
},
{ "id": "issue6962",
"file": "pdfs/issue6962.pdf",
"md5": "d40e871ecca68baf93114bd28c782148",
Expand Down

0 comments on commit 4acd31f

Please sign in to comment.