Skip to content
This repository
  • 3 commits
  • 1 file changed
  • 0 comments
  • 1 contributor
Jan 17, 2010
Robert Righter Initial commit of htmldocument, not quite working yet 70187e9
Robert Righter bug fixes b393dd2
Robert Righter formating e7aa4df

Showing 1 changed file with 99 additions and 2 deletions. Show diff stats Hide diff stats

  1. +99 2 lib/node-xml.js
101 lib/node-xml.js
@@ -1165,7 +1165,101 @@ Stack.prototype.push = function(o) {
1165 1165 this.m_arr[this.m_arr.length] = o;
1166 1166 }
1167 1167
1168   -// CONVENIENCE FUNCTIONS
  1168 +
  1169 +//////////////////////////////////////////////////////HIGHER LEVEL PARSER//////////////////////////////////////////////
  1170 +sys = require('sys');
  1171 +
  1172 +var parseHtmlFile = function(filename, callback){
  1173 + var posix = require('posix');
  1174 + //var that = this;
  1175 + posix.cat(filename).addCallback(function (content) {
  1176 + parseHtmlString(content,callback);
  1177 + });
  1178 +}
  1179 +
  1180 +var parseHtmlString = function(thehtml, callback){
  1181 +
  1182 + var m_doc = new HtmlDocument();
  1183 + //make a sax parser and hook up the events
  1184 + var m_parser = new SaxParser(function(cb) {
  1185 + cb.onStartDocument(function() {
  1186 + });
  1187 + cb.onEndDocument(function() {
  1188 + callback(m_doc);
  1189 + });
  1190 + cb.onStartElementNS(function(elem, attrs, prefix, uri, namespaces) {
  1191 + m_doc._pushElement(elem, attrs, prefix, uri, namespaces);
  1192 + });
  1193 + cb.onEndElementNS(function(elem, prefix, uri) {
  1194 + m_doc._popElement(elem, prefix, uri);
  1195 + });
  1196 + cb.onCharacters(function(chars) {
  1197 + m_doc._pushCharactersOnCurrentElement(chars);
  1198 + });
  1199 + cb.onCdata(function(cdata) {
  1200 +
  1201 + });
  1202 + cb.onComment(function(msg) {
  1203 +
  1204 + });
  1205 + cb.onWarning(function(msg) {
  1206 +
  1207 + });
  1208 + cb.onError(function(msg) {
  1209 + sys.puts('<ERROR>'+JSON.stringify(msg)+"</ERROR>");
  1210 + //(JSON.stringify(msg));
  1211 + });
  1212 + });
  1213 + m_parser.parseString(thehtml);
  1214 +}
  1215 +
  1216 +var HtmlDocument = function (){
  1217 + this.m_list = new Array();
  1218 + this.m_elementstack = new Array();
  1219 +}
  1220 +
  1221 +HtmlDocument.prototype._pushElement = function(elem, attrs, prefix, uri, namespaces) {
  1222 + this.m_elementstack.push(elem);
  1223 + this.m_list.push( {
  1224 + elempath : '/' + this.m_elementstack.join('/'),
  1225 + attributes : attrs,
  1226 + prefix : prefix,
  1227 + uri : uri,
  1228 + });
  1229 +}
  1230 +
  1231 +HtmlDocument.prototype._popElement = function(elem, prefix, uri){
  1232 + this.m_elementstack.pop();
  1233 +}
  1234 +
  1235 +HtmlDocument.prototype._pushCharactersOnCurrentElement = function (characters){
  1236 + this.m_elementstack.push('_CHARS');
  1237 + this.m_list.push({
  1238 + elempath : '/' + this.m_elementstack.join('/'),
  1239 + attributes : [],
  1240 + prefix : '',
  1241 + uri : '',
  1242 + chars : characters,
  1243 + });
  1244 + this.m_elementstack.pop();
  1245 +}
  1246 +
  1247 +
  1248 +HtmlDocument.prototype.asString = function (){
  1249 + var toreturn = '';
  1250 + this.m_list.forEach(function (item){
  1251 + toreturn+= item.elempath + "\n";
  1252 + });
  1253 + return toreturn;
  1254 +}
  1255 +
  1256 +HtmlDocument.prototype.find = function (xpath){
  1257 + //TODO: implement xpath querys
  1258 +}
  1259 +
  1260 +
  1261 +
  1262 +///////////////////////////////////////////////////// CONVENIENCE FUNCTIONS /////////////////////////////////////////////
1169 1263 function isEmpty(str) {
1170 1264 return (str==null) || (str.length==0);
1171 1265 }
@@ -1243,7 +1337,10 @@ function __unescapeString(str) {
1243 1337 }
1244 1338
1245 1339 process.mixin(exports, {
1246   - SaxParser: SaxParser
  1340 + SaxParser: SaxParser,
  1341 + parseHtmlFile: parseHtmlFile,
  1342 + parseHtmlString : parseHtmlString,
  1343 + HtmlDocument : HtmlDocument
1247 1344 });
1248 1345
1249 1346 })()

No commit comments for this range

Something went wrong with that request. Please try again.