Skip to content

Commit

Permalink
serveral changes:
Browse files Browse the repository at this point in the history
- Uint8Array for transition table
- error propagation via inst_E callback
- optimized parse_params
- ~80% speedup
  • Loading branch information
jerch committed Nov 19, 2015
1 parent b6f94ac commit 0651488
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 36 deletions.
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ Methods a terminal should implement:
* inst_P(data) *dcs put*
* inst_U() *dcs unhook*

There is a new `inst_E(e)` callback to track internal parsing errors with `e` containing all internal
parser states at error time. Additionally the parser will stop immediately if you return a value
from this callback (probably with broken state - use `.reset` to fix the parser after investigation).

**NOTE:** If the terminal object doesn't provide the needed methods the parser
will inject dummy methods to keep working.

Expand Down Expand Up @@ -53,7 +57,10 @@ parser.parse('\x1bP0!u%5\x1b\'');
```
For a more complex terminal see [node-ansiterminal](https://github.com/netzkolchose/node-ansiterminal).

## Known Issues

* DEL (0x7f) is not handled at all at the moment (basically making the parser only up to VT220 compatible).
* No error propagation, all errors will silently reset the parser and continue with the next character.
## Parser Throughput

With noop terminal functions the parser has a throughput of ~41 MB/s
for normal terminal stuff like `ls -R /usr/lib` on my computer.
For expensive tasks with many csi escape sequences the throughput drops to ~18 MB/s.
That is 50-70% of the speed of a similar parser written in C (~65 MB/s and ~37 MB/s with same test data).
79 changes: 51 additions & 28 deletions dist/ansiparser.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* @param {number=} next - next state
*/
function add(table, inp, state, action, next) {
table[state][inp] = [action | 0, (next === undefined) ? state : next];
table[state<<8|inp] = ((action | 0) << 4) | ((next === undefined) ? state : next);
}

/**
Expand All @@ -51,14 +51,15 @@

/**
* create the standard transition table - used by all parser instances
* [state][character code] --> [action][next state]
*
* table[state << 8 | character code] = action << 4 | next state
*
* - states are numbers from 0 to 13
* - control character codes defined from 0 to 159 (C0 and C1)
* - actions are numbers from 0 to 14
* - any higher character than 159 is handled by the 'error' action
*
* states replacement:
* state replacements (14 states):
* 'GROUND' -> 0
* 'ESCAPE' -> 1
* 'ESCAPE_INTERMEDIATE' -> 2
Expand All @@ -74,7 +75,7 @@
* 'DCS_INTERMEDIATE' -> 12
* 'DCS_PASSTHROUGH' -> 13
*
* actions replacement:
* action replacements (15 actions):
* 'no action' -> 0
* 'error' -> 1
* 'print' -> 2
Expand All @@ -92,15 +93,14 @@
* 'dcs_unhook' -> 14
*/
var TRANSITION_TABLE = (function() {
var table = [];
//var table = [];
var table = new Uint8Array(4095);

// table with default transition [any][any] --> [error, GROUND]
// table with default transition [any] --> [error, GROUND]
for (var state=0; state<14; ++state) {
var chars = [];
for (var code=0; code<160; ++code) {
chars.push([1, 0]);
table[state<<8|code] = 16;
}
table.push(chars);
}

// apply transitions
Expand Down Expand Up @@ -212,10 +212,12 @@
*/
function parse_params(params) {
// params are separated by ';'
// 16 integer params max allowed
// empty defaults to 0
return params.split(';').slice(0, 16).map(
function (el) {return (el) ? parseInt(el, 10) : 0;});
var p = params.split(';');
for (var i=0; i<p.length; ++i) {
p[i] = Number(p[i]);
}
return p;
}


Expand All @@ -237,7 +239,7 @@
// back reference to terminal
this.term = terminal || {};
var instructions = ['inst_p', 'inst_o', 'inst_x', 'inst_c',
'inst_e', 'inst_H', 'inst_P', 'inst_U'];
'inst_e', 'inst_H', 'inst_P', 'inst_U', 'inst_E'];
for (var i=0; i<instructions.length; ++i)
if (!(instructions[i] in this.term))
this.term[instructions[i]] = function() {};
Expand All @@ -258,7 +260,7 @@
* @param {string} s
*/
AnsiParser.prototype.parse = function(s) {
var c, code, transition, action, next_state;
var c, code, transition, error = false;
var current_state = this.current_state;

// local buffers
Expand All @@ -269,13 +271,16 @@
var params = this.params;

// process input string
for (var i=0; i< s.length; ++i) {
for (var i=0; i<s.length; ++i) {
c = s.charAt(i);
code = c.charCodeAt(0);
transition = TRANSITION_TABLE[current_state][code] || [1, 0];
action = transition[0];
next_state = transition[1];
switch (action) {
if (code < 0xa0) {
transition = TRANSITION_TABLE[current_state<<8|code];
}
else {
transition = 16;
}
switch (transition >> 4) {
case 0: // no action
break;
case 1: // error
Expand All @@ -288,19 +293,37 @@
break;
case 8: // OSC_STRING -> add char to osc string
osc += c;
next_state = 8;
transition |= 8;
break;
case 6: // CSI_IGNORE -> ignore char
next_state = 6;
transition |= 6;
break;
case 11: // DCS_IGNORE -> ignore char
next_state = 11;
transition |= 11;
break;
case 13: // DCS_PASSTHROUGH -> add char to dcs
dcs += c;
next_state = 13;
transition |= 13;
break;
default:
error = true;
}
} else {
error = true;
}
if (error) {
if (this.term.inst_E(
{
pos: i, // position in parse string
character: c, // wrong character
state: current_state, // in state
print: printed, // print buffer
dcs: dcs, // dcs buffer
osc: osc, // osc buffer
collect: collected, // collect buffer
params: params // params buffer
})) {return;}
error = false;
}
break;
case 2: // print
Expand Down Expand Up @@ -346,7 +369,7 @@
if (osc && code!==0x18 && code!==0x1a)
this.term.inst_o(osc);
if (code === 0x1b)
next_state = 1;
transition |= 1;
osc = '';
params = '';
collected = '';
Expand All @@ -364,21 +387,21 @@
}
this.term.inst_U();
if (code === 0x1b)
next_state = 1;
transition |= 1;
osc = '';
params = '';
collected = '';
dcs = '';
break;
}
current_state = next_state;
current_state = transition & 15;
}

// push leftover pushable buffers to terminal
if (!current_state && printed) {
this.term.inst_p(printed);
this.term.inst_p(printed);
} else if (current_state===13 && dcs) {
this.term.inst_P(dcs);
this.term.inst_P(dcs);
}

// save non pushable buffers
Expand Down
2 changes: 1 addition & 1 deletion dist/ansiparser.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
{
"name": "node-ansiparser",
"version": "2.0.2",
"version": "2.1.0",
"description": "A parser for ANSI escape codes.",
"main": "dist/ansiparser.js",
"keywords": [
"ansi",
"parser",
"terminal"
"terminal",
"escape sequence"
],
"author": "Joerg Breitbart <j.breitbart@netzkolchose.de>",
"license": "MIT",
Expand Down
20 changes: 20 additions & 0 deletions test/tests.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Ansiparser Tests</title>
<link rel="stylesheet" media="all" href="../node_modules/mocha/mocha.css">
</head>
<body>

<div id="mocha"><p><a href=".">Index</a></p></div>
<div id="messages"></div>
<div id="fixtures"></div>
<script src="../node_modules/mocha/mocha.js"></script>
<script src="../node_modules/chai/chai.js"></script>
<script src="../dist/ansiparser.js"></script>
<script>mocha.setup('bdd')</script>
<script src="tests.js"></script>
<script>mocha.run();</script>
</body>
</html>
59 changes: 57 additions & 2 deletions test/tests.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
var chai = require('chai');
var AnsiParser = require('../dist/ansiparser.js');
if (typeof module !== 'undefined' && module.exports) {
var chai = require('chai');
var AnsiParser = require('../dist/ansiparser.js');
}

function r(a, b) {
var c = b - a,
Expand Down Expand Up @@ -957,4 +959,57 @@ describe('coverage tests', function() {
parser.reset();
test_terminal.clear();
});
});

var ErrorTerminal1 = function(){};
ErrorTerminal1.prototype = test_terminal;
var err_terminal1 = new ErrorTerminal1();
err_terminal1.inst_E = function(e) {
this.calls.push(['error', e]);
};
var err_parser1 = new AnsiParser(err_terminal1);

var ErrorTerminal2 = function(){};
ErrorTerminal2.prototype = test_terminal;
var err_terminal2 = new ErrorTerminal2();
err_terminal2.inst_E = function(e) {
this.calls.push(['error', e]);
return true; // --> abort parsing
};
var err_parser2 = new AnsiParser(err_terminal2);

describe('error tests', function() {
it('CSI_PARAM unicode error - inst_E output w/o abort', function () {
err_parser1.parse('\x1b[<31;5€normal print');
err_terminal1.compare([
['error', {
pos: 7,
character: '€',
state: 4,
print: '',
dcs: '',
osc: '',
collect: '<',
params: '31;5'}],
['print', 'normal print']
]);
parser.reset();
test_terminal.clear();
});
it('CSI_PARAM unicode error - inst_E output with abort', function () {
err_parser2.parse('\x1b[<31;5€no print');
err_terminal2.compare([
['error', {
pos: 7,
character: '€',
state: 4,
print: '',
dcs: '',
osc: '',
collect: '<',
params: '31;5'}]
]);
parser.reset();
test_terminal.clear();
});
});

0 comments on commit 0651488

Please sign in to comment.