Skip to content
This repository has been archived by the owner on Aug 25, 2022. It is now read-only.

Commit

Permalink
tunning
Browse files Browse the repository at this point in the history
  • Loading branch information
SamuelZhaoY committed Dec 12, 2018
1 parent 75eba9f commit 016fa49
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 27 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ nosmoke
| :---: | :---: | :---: |


This project follows the git-contributor [spec](https://github.com/xudafeng/git-contributor), auto upated at `Sun Dec 09 2018 14:52:55 GMT+0800`.
This project follows the git-contributor [spec](https://github.com/xudafeng/git-contributor), auto upated at `Sun Dec 09 2018 21:26:54 GMT+0800`.

<!-- GITCONTRIBUTOR_END -->

Expand Down
4 changes: 3 additions & 1 deletion lib/common/helper.js
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ _.addHookAPIs = function(hook, crawler) {
} else {

// trigger input without knowing for element
await root.wdclient.send('/wd/hub/session/' + crawler.sessionId + '/keys', 'post', {'value': data.value}, null);
await root.wdclient.send('/wd/hub/session/' + crawler.sessionId + '/keys', 'post', {'value': [data.value]}, null);

}
};
Expand Down Expand Up @@ -167,6 +167,8 @@ _.addHookAPIs = function(hook, crawler) {
}
]}, null);
};

hook.sleep = _.sleep;
};

module.exports = _;
2 changes: 1 addition & 1 deletion lib/crawler/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ function NSCrawlerConfig() {

// - phase 2 - OCR
this.strategy = 'source';
this.depth = 20;
this.depth = 100;
this.duration = 1800;
this.triggers = [];
this.exclude = [];
Expand Down
15 changes: 9 additions & 6 deletions lib/crawler/ocr-impl/models.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
const ReporterTemplate = require('../reporterTemplate');
const _ = require('../../common/helper');
let root = require('window-or-global');
const utf8 = require('utf8');

/* Test suite control */
root.updatePassedTestsCount = function () {
Expand Down Expand Up @@ -40,10 +41,11 @@ function NSAppCrawlingTreeNodeAction() {
this.interval = 0; // test duration interval
this.source = {}; // - not in use - for source type strategy only
this.name = ''; // name
this.repeatable = false;
}

NSAppCrawlingTreeNodeAction.prototype.title = function() {
return `${this.name}-${JSON.stringify(this.location)}`;
return utf8.encode(`${this.location.origin.x}-${this.location.origin.y}`);
};

NSAppCrawlingTreeNodeAction.prototype.markActionStart = function (crawler) {
Expand All @@ -62,7 +64,7 @@ NSAppCrawlingTreeNodeAction.prototype.markActionFinish = function (crawler, file
this.bindedTest.pass = true;
this.bindedTest.state = 'passed';
this.bindedTest.duration = crawler.currentAction.interval;
this.bindedTest.code = JSON.stringify(crawler.currentAction, null, 2);
this.bindedTest.code = '';
}
};

Expand Down Expand Up @@ -96,7 +98,8 @@ NSAppCrawlingTreeNode.prototype.produceNodeActions = function(rawElements) {
let shallowCopy = Object.assign({}, rawElement);
action.location = shallowCopy.location;
action.input = shallowCopy.input;
action.name = shallowCopy.name;
action.name = utf8.encode(shallowCopy.name || '');
action.repeatable = shallowCopy.repeatable;
actions.push(action);
}

Expand Down Expand Up @@ -125,13 +128,13 @@ NSAppCrawlingTreeNode.prototype.checkAndInitReportDataIfNeeded = function (crawl
// empty case
if (!this.reportSuite) {
this.reportSuite = ReporterTemplate.suite();
this.reportSuite.title = 'View: ' + this.abbreviatedDigest();
this.reportSuite.title = 'View: ';
this.reportSuite.uuid = `${_.uuid()}`;

let shell = ReporterTemplate.suite();
shell.uuid = _.uuid();
shell.suites.push(this.reportSuite);
shell.title = 'View: ' + this.abbreviatedDigest();
shell.title = 'View: ';
this.reportSuite.context = this.fileName || null;
root.mockData.suites.suites.push(shell);
}
Expand All @@ -144,7 +147,7 @@ NSAppCrawlingTreeNode.prototype.checkAndInitReportDataIfNeeded = function (crawl
for (let i = 0; i < this.actions.length; i++) {
let test = ReporterTemplate.test();
test.title = this.actions[i].title();
test.fullTitle = `View: ${this.abbreviatedDigest()}//${this.actions[i].title()}`;
test.fullTitle = 'View: ';
this.actions[i].bindedTest = test;
this.reportSuite.tests.push(test);
this.reportSuite.totalPasses++;
Expand Down
39 changes: 23 additions & 16 deletions lib/crawler/ocr-impl/ocr-crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ configure({
});

let maxTextSpacingInDP = 30;
let estimatedStatusBarHeight = 40;
let estimatedStatusAndNavBarHeight = 200;
let DPTOPXRatio = 1;

var HOCR = require('./hocr-to-json');
var tesseract = require('node-tesseract');
var tesseract = require('./tesseract');

var options = {
l: 'chi_sim',
l: 'chi_sim+eng',
binary: '/usr/local/bin/tesseract',
psm: 3,
config: '--oem 1'
Expand Down Expand Up @@ -239,8 +239,11 @@ NSCrawler.prototype.performAction = async function(actions) {
let action = null;
for (let i = 0; i < actions.length; i++) {
let candidate = actions[i];
if (!candidate.isTriggered) {
candidate.isTriggered = true;
if (!candidate.isTriggered || candidate.repeatable) {

if (!candidate.repeatable) {
candidate.isTriggered = true;
}

/** log and only log in the current progress */
this.currentAction = candidate;
Expand Down Expand Up @@ -290,17 +293,22 @@ NSCrawler.prototype.recursiveFilter = function (source, matches, exclusive) {
let normalAction = [];

// check if current source contains head bar
if (source.lines[0] && source.lines[0].line[0].bbox.y0 / DPTOPXRatio < estimatedStatusBarHeight) {
source.lines.shift();
let shift_items = 0;
for (const line of source.lines) {
if (line.line[0].bbox.y0 / DPTOPXRatio < estimatedStatusAndNavBarHeight) {
shift_items++;
}
}

source.lines = source.lines.slice(shift_items);

for (let paramIndex = 0; paramIndex < source.lines.length; paramIndex++) {

let param = source.lines[paramIndex].line;
for (let index = 0; index < param.length; index++) {

let currentLine = param[index];
let currentWord = {input: '', location: {origin: {x: 0, y: 0}, size: {w: 0, h: 0}}, name: ''};
let currentWord = {input: '', location: {origin: {x: 0, y: 0}, size: {w: 0, h: 0}}, name: '', repeatable: false};

// check regular action
for (let wordIndex in currentLine.words) {
Expand Down Expand Up @@ -348,14 +356,12 @@ NSCrawler.prototype.recursiveFilter = function (source, matches, exclusive) {
}
}

if (currentWord.location.origin.x > 0) {
if (!this.checkShouldBeExcluded(this.config.exclude, currentWord)) {
normalAction.push(currentWord);
this.checkContentMatch(matches, currentWord, prioritisedAction);
console.log(`filtering out action: ${JSON.stringify(currentWord)}`);
} else {
console.log(`action ignored: ${JSON.stringify(currentWord)}`);
}
if (!this.checkShouldBeExcluded(this.config.exclude, currentWord)) {
normalAction.push(currentWord);
this.checkContentMatch(matches, currentWord, prioritisedAction);
console.log(`filtering out action: ${JSON.stringify(currentWord)}`);
} else {
console.log(`action ignored: ${JSON.stringify(currentWord)}`);
}
}
}
Expand Down Expand Up @@ -384,6 +390,7 @@ NSCrawler.prototype.checkContentMatch = function (matches, word, prioritisedActi
// check prioritized action
for (let match in matches) {
if (word.name && matches[match] && matches[match].toLowerCase() === word.name.toLowerCase()) {
word.repeatable = true;
prioritisedAction.push(word);
console.log(`matching prioritized action: ${JSON.stringify(word)}`);
break;
Expand Down
142 changes: 142 additions & 0 deletions lib/crawler/ocr-impl/tesseract.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
'use strict';

/**
* Module dependencies.
*/
var utils = require('./utils');
var exec = require('child_process').exec;
var fs = require('fs');
var tmpdir = require('os').tmpdir(); // let the os take care of removing zombie tmp files
var uuid = require('node-uuid');
var path = require('path');
var glob = require('glob');

var Tesseract = {

tmpFiles: [],

/**
* options default options passed to Tesseract binary
* @type {Object}
*/
options: {
'l': 'eng',
'psm': 3,
'config': null,
'binary': 'tesseract'
},

/**
* outputEncoding
* @type {String}
*/
outputEncoding: 'UTF-8',

/**
* Runs Tesseract binary with options
*
* @param {String} image
* @param {Object} options to pass to Tesseract binary
* @param {Function} callback
*/
process: function(image, options, callback) {

if (typeof options === 'function') {
callback = options;
options = null;
}

options = utils.merge(Tesseract.options, options);

// generate output file name
var output = path.resolve(tmpdir, 'node-tesseract-' + uuid.v4());

// add the tmp file to the list
Tesseract.tmpFiles.push(output);

// assemble tesseract command
var command = [options.binary, image, output];

if (options.l !== null) {
command.push('-l ' + options.l);
}

if (options.psm !== null) {
command.push('--psm ' + options.psm);
}

if (options.config !== null) {
command.push(options.config);
}

command.push('hocr');

command = command.join(' ');

var opts = options.env || {};

// Run the tesseract command
exec(command, opts, function(err) {
if (err) {
// Something went wrong executing the assembled command
callback(err, null);
return;
}

// Find one of the three possible extension
glob(output + '.+(html|hocr|txt)', function(err, files) {
if (err) {
callback(err, null);
return;
}
fs.readFile(files[0], Tesseract.outputEncoding, function(err, data) {
if (err) {
callback(err, null);
return;
}

var index = Tesseract.tmpFiles.indexOf(output);
if (~index) Tesseract.tmpFiles.splice(index, 1);

fs.unlink(files[0]);

callback(null, data);
});
});
}); // end exec

}

};

function gc() {
for (var i = Tesseract.tmpFiles.length - 1; i >= 0; i--) {
try {
fs.unlinkSync(Tesseract.tmpFiles[i] + '.txt');
} catch (err) {}

var index = Tesseract.tmpFiles.indexOf(Tesseract.tmpFiles[i]);
if (~index) Tesseract.tmpFiles.splice(index, 1);
};
}

var version = process.versions.node.split('.').map(function(value) {
return parseInt(value, 10);
});

if (version[0] === 0 && (version[1] < 9 || version[1] === 9 && version[2] < 5)) {
process.addListener('uncaughtException', function _uncaughtExceptionThrown(err) {
gc();
throw err;
});
}

// clean up the tmp files
process.addListener('exit', function _exit(code) {
gc();
});

/**
* Module exports.
*/
module.exports.process = Tesseract.process;
21 changes: 21 additions & 0 deletions lib/crawler/ocr-impl/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
'use strict';

/**
* merge helper function to merge objects
* @param {Object} defaults
* @param {Object} options
* @return {Object}
*/
module.exports.merge = function (defaults, options) {
defaults = defaults || {};
if (options && typeof options === 'object') {
var i = 0, keys = Object.keys(options);

for (i = 0; i < keys.length; i += 1) {
if (options[keys[i]] !== undefined) {
defaults[keys[i]] = options[keys[i]];
}
}
}
return defaults;
};
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
"co": "^4.6.0",
"commander": "^2.9.0",
"fs-extra": "^7.0.0",
"glob": "^5.0.10",
"htmlparser": "^1.7.7",
"htmlparser2": "1.5.0",
"image-size": "^0.6.3",
"is-url": "^1.2.4",
"isomorphic-fetch": "^2.2.1",
Expand All @@ -34,13 +36,13 @@
"macaca-reporter": "^1.1.11",
"macaca-utils": "^1.0.1",
"node-fetch": "^1.7.2",
"node-tesseract": "^0.2.7",
"node-uuid": "^1.4.1",
"npm-update": "^1.0.2",
"object-assign": "^4.1.0",
"png.js": "^0.2.1",
"socket.io": "^2.0.3",
"htmlparser2": "1.5.0",
"string-similarity": "^2.0.0",
"utf8": "^3.0.0",
"window-or-global": "^1.0.1",
"xlogger": "^1.0.4",
"xml2map": "^1.0.2",
Expand Down

0 comments on commit 016fa49

Please sign in to comment.