Skip to content
This repository

Forgiving HTML/XML/RSS Parser in JS for *both* Node and Browsers

Fetching latest commit…

Octocat-spinner-32-eaf2f5

Cannot retrieve the latest commit at this time

Octocat-spinner-32 tests
Octocat-spinner-32 LICENSE
Octocat-spinner-32 README
Octocat-spinner-32 node-htmlparser.js
Octocat-spinner-32 runtests.js
README
A forgiving HTML parser written in JS for Node.
Reads in imperfect HTML and spits out a simple object model.

Usage:
	var htmlparser = require("node-htmlparser");
	var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< /  script><!--<!-- Waah! -- -->";
	var handler = new htmlparser.DefaultHandler();
	var parser = new htmlparser.Parser();
	htmlparser.ParseComplete(rawHtml);
	sys.puts(sys.inspect(handler.dom, false, null));
	
	Outputs...
		[ { raw: 'Xyz ', data: 'Xyz ', type: 'text' }
		, { raw: 'script language= javascript'
		  , data: 'script language= javascript'
		  , type: 'script'
		  , name: 'script'
		  , attribs: { language: 'javascript' }
		  , children: 
		     [ { raw: 'var foo = \'<bar>\';<'
		       , data: 'var foo = \'<bar>\';<'
		       , type: 'text'
		       }
		     ]
		  }
		, { raw: '<!-- Waah! -- '
		  , data: '<!-- Waah! -- '
		  , type: 'comment'
		  }
		]
Something went wrong with that request. Please try again.