diff --git a/src/components/ScrapeHtml.coffee b/src/components/ScrapeHtml.coffee index b4524f454..900e3d9b0 100644 --- a/src/components/ScrapeHtml.coffee +++ b/src/components/ScrapeHtml.coffee @@ -3,23 +3,36 @@ jsdom = require "jsdom" class ScrapeHtml extends noflo.Component constructor: -> - @html = "" + @html = [] @textSelector = "" - @crapSelectors = [] + @ignoreSelectors = [] @inPorts = in: new noflo.Port() textSelector: new noflo.Port() - crapSelector: new noflo.ArrayPort() + ignoreSelector: new noflo.ArrayPort() @outPorts = out: new noflo.Port() error: new noflo.Port() html = "" + @inPorts.in.on "connect", => + @html = [] + @inPorts.in.on "begingroup", (group) => + @outPorts.out.beginGroup group @inPorts.in.on "data", (data) => html += data + @inPorts.in.on "endgroup", => + @once "scraped", => + @outPorts.out.endGroup() + @html.push html + html = "" + @scrapeHtml() @inPorts.in.on "disconnect", => - @html = html + @once "scraped", => + @outPorts.out.disconnect() + return if @html.length > 0 # we are using groups + @html.push html html = "" @scrapeHtml() @@ -28,21 +41,22 @@ class ScrapeHtml extends noflo.Component @inPorts.textSelector.on "disconnect", => @scrapeHtml() - @inPorts.crapSelector.on "data", (data) => - @crapSelectors.push data + @inPorts.ignoreSelector.on "data", (data) => + @ignoreSelectors.push data scrapeHtml: -> - return unless @html.length - return unless @textSelector.length - target = @outPorts.out - jsdom.env @html, ['http://code.jquery.com/jquery.min.js'], (err, win) => - if err - @outPorts.error.send err - return @outPorts.error.disconnect() - win.$(crap).remove() for crap in @crapSelectors - for text in (win.$(@textSelector).map -> win.$(this).text()) - @outPorts.out.send text - @outPorts.out.disconnect() - @html = "" + return unless @html.length > 0 + return unless @textSelector.length > 0 + for h in @html + jsdom.env h, ['http://code.jquery.com/jquery.min.js'], (err, win) => + if err + @outPorts.error.send err + return @outPorts.error.disconnect() + win.$(ignore).remove() for ignore in @ignoreSelectors + win.$(@textSelector).map (i,e) => + @outPorts.out.beginGroup e.id if e.hasAttribute "id" + @outPorts.out.send win.$(e).text() + @outPorts.out.endGroup() if e.hasAttribute "id" + @emit "scraped" exports.getComponent = -> new ScrapeHtml diff --git a/test/ScrapeHtml.coffee b/test/ScrapeHtml.coffee new file mode 100644 index 000000000..ec41cda75 --- /dev/null +++ b/test/ScrapeHtml.coffee @@ -0,0 +1,84 @@ +scrape = require "../src/components/ScrapeHtml" +socket = require "../src/lib/InternalSocket" + +setupComponent = -> + c = scrape.getComponent() + ins = socket.createSocket() + out = socket.createSocket() + c.inPorts.in.attach ins + c.outPorts.out.attach out + return [c, ins, out] + +exports["test selector then html"] = (test) -> + [c, ins, out] = setupComponent() + s = socket.createSocket() + c.inPorts.textSelector.attach s + expect = ["bar","baz"] + out.once "begingroup", (group) -> + test.fail "should not get groups without element ids" + out.on "data", (data) -> + test.equal data, expect.shift() + test.done() if expect.length == 0 + s.send "p.test" + s.disconnect() + ins.send '

foo

ba' + ins.send 'r

baz

' + ins.disconnect() + +exports["test html then selector"] = (test) -> + [c, ins, out] = setupComponent() + s = socket.createSocket() + c.inPorts.textSelector.attach s + expect = ["bar","baz"] + out.on "data", (data) -> + test.equal data, expect.shift() + test.done() if expect.length == 0 + ins.send '

foo

ba' + ins.send 'r

baz

' + ins.disconnect() + s.send "p.test" + s.disconnect() + +exports["test ignore"] = (test) -> + [c, ins, out] = setupComponent() + s = socket.createSocket() + i = socket.createSocket() + c.inPorts.textSelector.attach s + c.inPorts.ignoreSelector.attach i + expect = ["foo"] + out.on "data", (data) -> + test.equal data, expect.shift() + test.done() if expect.length == 0 + i.send ".noise" + i.send "#crap" + i.disconnect() + ins.send '

foo

ba' + ins.send 'r

baz

' + ins.disconnect() + s.send "p.test" + s.disconnect() + +exports["test group by element id"] = (test) -> + [c, ins, out] = setupComponent() + s = socket.createSocket() + c.inPorts.textSelector.attach s + expectevent = "begingroup" + expectgroup = ["a","b"] + out.on "begingroup", (group) -> + test.equal "begingroup", expectevent + test.equal group, expectgroup.shift() + expectevent = "data" + expectdata = ["bar","baz"] + out.on "data", (data) -> + test.equal "data", expectevent + test.equal data, expectdata.shift() + expectevent = "endgroup" + out.on "endgroup", -> + test.equal "endgroup", expectevent + expectevent = "begingroup" + test.done() if expectgroup.length == 0 + s.send "p.test" + s.disconnect() + ins.send '

foo

ba' + ins.send 'r

baz

' + ins.disconnect()