Skip to content

Commit

Permalink
Initial commit, basic html tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mixu committed Apr 6, 2013
0 parents commit 5b44ef5
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/node_modules/
49 changes: 49 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
var emptyTags = {
"area": 1,
"base": 1,
"basefont": 1,
"br": 1,
"col": 1,
"frame": 1,
"hr": 1,
"img": 1,
"input": 1,
"isindex": 1,
"link": 1,
"meta": 1,
"param": 1,
"embed": 1,
"?xml": 1
};

function html(item) {
// apply recursively to arrays
if(Array.isArray(item)) {
return item.map(html).join('');
}
switch(item.type) {
case 'text':
return item.data;
case 'directive':
return;
case 'comment':
return '<!--'+item.data+'-->';
case 'style':
case 'script':
case 'tag':
return '<'+item.name+
(item.attribs ?
Object.keys(item.attribs).reduce(function(prev, key) {
return prev + ' '+key+'="'+item.attribs[key]+'"'
}, '') : ''
)+
'>'+
(item.children ? html(item.children) : '')+
(emptyTags[item.name] ? '' : '</'+item.name+'>');
case 'cdata':
return;
}

}

module.exports = html;
26 changes: 26 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"name": "htmlparser-to-html",
"version": "0.0.0",
"description": "Converts the JSON that the htmlparser/htmlparser2 package produces back to HTML.",
"main": "index.js",
"scripts": {
"test": "node test.js"
},
"repository": {
"type": "git",
"url": "git://github.com/mixu/htmlparser-to-html.git"
},
"keywords": [
"html",
"parser",
"htmlparser",
"htmlparser2"
],
"author": "Mikito Takada <mikito.takada@gmail.com>",
"license": "BSD",
"readmeFilename": "readme.md",
"devDependencies": {
"mocha": "~1.9.0",
"htmlparser": "~1.7.6"
}
}
34 changes: 34 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# htmlparser-to-html

Converts the JSON that [htmlparser](https://npmjs.org/package/htmlparser) (and probably [htmlparser2](https://npmjs.org/package/htmlparser2)) produces back to HTML.

Useful if you're doing some sort of transformation.

Tests are based on reversing the parser tests in htmlparser, so they are quite comprehensive.

## Usage

var html = require('htmlparser-to-html');

console.log(html([
{ type: 'tag'
, name: 'html'
, children:
[ { type: 'tag'
, name: 'title'
, children: [ { data: 'The Title', type: 'text' } ]
}
, { type: 'tag'
, name: 'body'
, children: [ { data: 'Hello world', type: 'text' } ]
}
]
}
]));

// outputs: <html><title>The Title</title><body>Hello world</body></html>

Of course, you probably want to generate the array from htmlparser.



56 changes: 56 additions & 0 deletions test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
var util = require('util'),
assert = require('assert'),

html = require('./index.js'),
htmlparser = require('htmlparser');

// test cases are auto-generated since we just check that
// html(htmlparser(original)) == original is true
var cases = {
"Basic test": "<html><title>The Title</title><body>Hello world</body></html>",
"Single Tag 1": "<br>text",
"Unescaped chars in script": "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>",
"Special char in comment": "<head><!-- commented out tags <title>Test</title>--></head>",
"Script source in comment": "<script><!--var foo = 1;--></script>",
"Unescaped chars in style": "<style type=\"text/css\">\n body > p\n { font-weight: bold; }</style>",
"Singular attribute": "<option value=\"foo\" selected=\"selected\"></option>",
"Text outside tags": "Line one\n<br>\nline two",
"Only text": "this is the text",
"Comment within text": "this is <!-- the comment --> the text",
"Comment within text within script": "<script>this is <!-- the comment --> the text</script>",
"XML Namespace": "<ns:tag>text</ns:tag>",
};

function parse(html) {
var handler = new htmlparser.DefaultHandler(function(error, dom) {
if(error) throw error;
}, { verbose: false, ignoreWhitespace: true });
var parser = new htmlparser.Parser(handler);
parser.parseComplete(html);
// silly async/sync hybrid
return handler.dom;
}

// this generates the exports tests
Object.keys(cases).forEach(function(testName) {
var expected, original;
// if the output will differ from the input due to the permissive parsing, store the real expected value
if(Array.isArray(cases[testName])) {
original = cases[testName][0];
expected = cases[testName][1];
} else {
expected = original = cases[testName];
}
exports[testName] = function() {

// console.log(util.inspect(parse(original), false, 10, true));
assert.equal(html(parse(original)), original);
}
});

// if this module is the script being run, then run the tests:
if (module == require.main) {
var mocha = require('child_process').spawn('./node_modules/.bin/mocha', [ '--colors', '--ui', 'exports', '--reporter', 'spec', __filename ]);
mocha.stdout.pipe(process.stdout);
mocha.stderr.pipe(process.stderr);
}

0 comments on commit 5b44ef5

Please sign in to comment.