This repository has been archived by the owner on Aug 25, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
75eba9f
commit 016fa49
Showing
8 changed files
with
204 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
'use strict'; | ||
|
||
/** | ||
* Module dependencies. | ||
*/ | ||
var utils = require('./utils'); | ||
var exec = require('child_process').exec; | ||
var fs = require('fs'); | ||
var tmpdir = require('os').tmpdir(); // let the os take care of removing zombie tmp files | ||
var uuid = require('node-uuid'); | ||
var path = require('path'); | ||
var glob = require('glob'); | ||
|
||
var Tesseract = { | ||
|
||
tmpFiles: [], | ||
|
||
/** | ||
* options default options passed to Tesseract binary | ||
* @type {Object} | ||
*/ | ||
options: { | ||
'l': 'eng', | ||
'psm': 3, | ||
'config': null, | ||
'binary': 'tesseract' | ||
}, | ||
|
||
/** | ||
* outputEncoding | ||
* @type {String} | ||
*/ | ||
outputEncoding: 'UTF-8', | ||
|
||
/** | ||
* Runs Tesseract binary with options | ||
* | ||
* @param {String} image | ||
* @param {Object} options to pass to Tesseract binary | ||
* @param {Function} callback | ||
*/ | ||
process: function(image, options, callback) { | ||
|
||
if (typeof options === 'function') { | ||
callback = options; | ||
options = null; | ||
} | ||
|
||
options = utils.merge(Tesseract.options, options); | ||
|
||
// generate output file name | ||
var output = path.resolve(tmpdir, 'node-tesseract-' + uuid.v4()); | ||
|
||
// add the tmp file to the list | ||
Tesseract.tmpFiles.push(output); | ||
|
||
// assemble tesseract command | ||
var command = [options.binary, image, output]; | ||
|
||
if (options.l !== null) { | ||
command.push('-l ' + options.l); | ||
} | ||
|
||
if (options.psm !== null) { | ||
command.push('--psm ' + options.psm); | ||
} | ||
|
||
if (options.config !== null) { | ||
command.push(options.config); | ||
} | ||
|
||
command.push('hocr'); | ||
|
||
command = command.join(' '); | ||
|
||
var opts = options.env || {}; | ||
|
||
// Run the tesseract command | ||
exec(command, opts, function(err) { | ||
if (err) { | ||
// Something went wrong executing the assembled command | ||
callback(err, null); | ||
return; | ||
} | ||
|
||
// Find one of the three possible extension | ||
glob(output + '.+(html|hocr|txt)', function(err, files) { | ||
if (err) { | ||
callback(err, null); | ||
return; | ||
} | ||
fs.readFile(files[0], Tesseract.outputEncoding, function(err, data) { | ||
if (err) { | ||
callback(err, null); | ||
return; | ||
} | ||
|
||
var index = Tesseract.tmpFiles.indexOf(output); | ||
if (~index) Tesseract.tmpFiles.splice(index, 1); | ||
|
||
fs.unlink(files[0]); | ||
|
||
callback(null, data); | ||
}); | ||
}); | ||
}); // end exec | ||
|
||
} | ||
|
||
}; | ||
|
||
function gc() { | ||
for (var i = Tesseract.tmpFiles.length - 1; i >= 0; i--) { | ||
try { | ||
fs.unlinkSync(Tesseract.tmpFiles[i] + '.txt'); | ||
} catch (err) {} | ||
|
||
var index = Tesseract.tmpFiles.indexOf(Tesseract.tmpFiles[i]); | ||
if (~index) Tesseract.tmpFiles.splice(index, 1); | ||
}; | ||
} | ||
|
||
var version = process.versions.node.split('.').map(function(value) { | ||
return parseInt(value, 10); | ||
}); | ||
|
||
if (version[0] === 0 && (version[1] < 9 || version[1] === 9 && version[2] < 5)) { | ||
process.addListener('uncaughtException', function _uncaughtExceptionThrown(err) { | ||
gc(); | ||
throw err; | ||
}); | ||
} | ||
|
||
// clean up the tmp files | ||
process.addListener('exit', function _exit(code) { | ||
gc(); | ||
}); | ||
|
||
/** | ||
* Module exports. | ||
*/ | ||
module.exports.process = Tesseract.process; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
'use strict'; | ||
|
||
/** | ||
* merge helper function to merge objects | ||
* @param {Object} defaults | ||
* @param {Object} options | ||
* @return {Object} | ||
*/ | ||
module.exports.merge = function (defaults, options) { | ||
defaults = defaults || {}; | ||
if (options && typeof options === 'object') { | ||
var i = 0, keys = Object.keys(options); | ||
|
||
for (i = 0; i < keys.length; i += 1) { | ||
if (options[keys[i]] !== undefined) { | ||
defaults[keys[i]] = options[keys[i]]; | ||
} | ||
} | ||
} | ||
return defaults; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters