Skip to content

Commit 6f73cac

Browse files
committed
Extract stream helpers and WalkHTML methods
1 parent dd1c2fb commit 6f73cac

File tree

2 files changed

+132
-125
lines changed

2 files changed

+132
-125
lines changed

index.js

Lines changed: 74 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,13 @@ var isUrl = require('is-url')
1111
var isArray = Array.isArray
1212
var fs = require('fs')
1313

14-
function handleStreamError (stream, fn) {
15-
fn(function (err) {
16-
if (err) stream.emit('error', err)
17-
})
18-
}
19-
2014
/**
2115
* Locals
2216
*/
2317

2418
var absolutes = require('./lib/absolutes')
2519
var resolve = require('./lib/resolve')
20+
var streamHelper = require('./lib/stream')
2621
var params = require('./lib/params')
2722
var walk = require('./lib/walk')
2823

@@ -69,6 +64,9 @@ function Xray () {
6964
var pages = []
7065
var stream
7166

67+
var walkHTML = WalkHTML(xray, selector, scope)
68+
var request = Request(crawler)
69+
7270
function node (source2, fn) {
7371
if (arguments.length === 1) {
7472
fn = source2
@@ -84,10 +82,10 @@ function Xray () {
8482

8583
if (isUrl(source)) {
8684
debug('starting at: %s', source)
87-
xray.request(source, function (err, html) {
85+
request(source, function (err, html) {
8886
if (err) return next(err)
8987
var $ = load(html, source)
90-
node.html($, next)
88+
walkHTML($, next)
9189
})
9290
} else if (scope && ~scope.indexOf('@')) {
9391
debug('resolving to a url: %s', scope)
@@ -96,21 +94,21 @@ function Xray () {
9694
// ensure that a@href is a URL
9795
if (!isUrl(url)) {
9896
debug('%s is not a url. Skipping!', url)
99-
return node.html(load(''), next)
97+
return walkHTML(load(''), next)
10098
}
10199

102100
debug('resolved "%s" to a %s', scope, url)
103-
xray.request(url, function (err, html) {
101+
request(url, function (err, html) {
104102
if (err) return next(err)
105103
var $ = load(html, url)
106-
node.html($, next)
104+
walkHTML($, next)
107105
})
108106
} else if (source) {
109107
var $ = load(source)
110-
node.html($, next)
108+
walkHTML($, next)
111109
} else {
112110
debug('%s is not a url or html. Skipping!', source)
113-
return node.html(load(''), next)
111+
return walkHTML(load(''), next)
114112
}
115113

116114
function next (err, obj, $) {
@@ -120,8 +118,8 @@ function Xray () {
120118

121119
// create the stream
122120
if (!stream) {
123-
if (paginate) stream = stream_array(state.stream)
124-
else stream = stream_object(state.stream)
121+
if (paginate) stream = streamHelper.array(state.stream)
122+
else stream = streamHelper.object(state.stream)
125123
}
126124

127125
if (paginate) {
@@ -152,10 +150,10 @@ function Xray () {
152150
debug('paginating %j', url)
153151
isFinite(limit) && debug('%s page(s) left to crawl', limit)
154152

155-
xray.request(url, function (err, html) {
153+
request(url, function (err, html) {
156154
if (err) return next(err)
157155
var $ = load(html, url)
158-
node.html($, next)
156+
walkHTML($, next)
159157
})
160158
} else {
161159
stream(obj, true)
@@ -166,54 +164,6 @@ function Xray () {
166164
return node
167165
}
168166

169-
function load (html, url) {
170-
html = html || ''
171-
var $ = html.html ? html : cheerio.load(html)
172-
if (url) $ = absolutes(url, $)
173-
return $
174-
}
175-
176-
node.html = function ($, fn) {
177-
walk(selector, function (v, k, next) {
178-
if (typeof v === 'string') {
179-
var value = resolve($, root(scope), v)
180-
return next(null, value)
181-
} else if (typeof v === 'function') {
182-
return v($, function (err, obj) {
183-
if (err) return next(err)
184-
return next(null, obj)
185-
})
186-
} else if (isArray(v)) {
187-
if (typeof v[0] === 'string') {
188-
return next(null, resolve($, root(scope), v))
189-
} else if (typeof v[0] === 'object') {
190-
var $scope = $.find ? $.find(scope) : $(scope)
191-
var pending = $scope.length
192-
var out = []
193-
194-
// Handle the empty result set (thanks @jenbennings!)
195-
if (!pending) return next(null, out)
196-
197-
$scope.each(function (i, el) {
198-
var $innerscope = $scope.eq(i)
199-
var node = xray(scope, v[0])
200-
node($innerscope, function (err, obj) {
201-
if (err) return next(err)
202-
out[i] = obj
203-
if (!--pending) {
204-
return next(null, compact(out))
205-
}
206-
})
207-
})
208-
}
209-
}
210-
return next()
211-
}, function (err, obj) {
212-
if (err) return fn(err)
213-
fn(null, obj, $)
214-
})
215-
}
216-
217167
node.paginate = function (paginate) {
218168
if (!arguments.length) return state.paginate
219169
state.paginate = paginate
@@ -229,29 +179,20 @@ function Xray () {
229179
node.stream = function () {
230180
state.stream = store.createWriteStream()
231181
var rs = store.createReadStream()
232-
handleStreamError(rs, node)
182+
streamHelper.waitCb(rs, node)
233183
return rs
234184
}
235185

236186
node.write = function (path) {
237187
if (!arguments.length) return node.stream()
238188
state.stream = fs.createWriteStream(path)
239-
handleStreamError(state.stream, node)
189+
streamHelper.waitCb(state.stream, node)
240190
return state.stream
241191
}
242192

243193
return node
244194
}
245195

246-
xray.request = function (url, fn) {
247-
debug('fetching %s', url)
248-
crawler(url, function (err, ctx) {
249-
if (err) return fn(err)
250-
debug('got response for %s with status code: %s', url, ctx.status)
251-
return fn(null, ctx.body)
252-
})
253-
}
254-
255196
methods.forEach(function (method) {
256197
xray[method] = function () {
257198
if (!arguments.length) return crawler[method]()
@@ -263,13 +204,29 @@ function Xray () {
263204
return xray
264205
}
265206

207+
function Request (crawler) {
208+
return function request (url, fn) {
209+
debug('fetching %s', url)
210+
crawler(url, function (err, ctx) {
211+
if (err) return fn(err)
212+
debug('got response for %s with status code: %s', url, ctx.status)
213+
return fn(null, ctx.body)
214+
})
215+
}
216+
}
217+
218+
function load (html, url) {
219+
var $ = html.html ? html : cheerio.load(html)
220+
if (url) $ = absolutes(url, $)
221+
return $
222+
}
223+
266224
/**
267225
* Get the root, if there is one.
268226
*
269227
* @param {Mixed}
270228
* @return {Boolean|String}
271229
*/
272-
273230
function root (selector) {
274231
return (typeof selector === 'string' || isArray(selector)) &&
275232
!~selector.indexOf('@') &&
@@ -294,53 +251,45 @@ function compact (arr) {
294251
})
295252
}
296253

297-
/**
298-
* Streaming array helper
299-
*
300-
* @param {Stream} data (optional)
301-
*/
302-
303-
function stream_array (stream) {
304-
if (!stream) return function () {}
305-
var first = true
306-
307-
return function _stream_array (data, end) {
308-
var json = JSON.stringify(data, true, 2)
309-
310-
if (first) {
311-
stream.write('[\n')
312-
first = false
313-
}
314-
315-
if (isArray(data)) {
316-
json = json.slice(1, -1)
317-
}
318-
319-
if (end) {
320-
stream.end(json + ']')
321-
} else {
322-
stream.write(json + ',')
323-
}
324-
}
325-
}
326-
327-
/**
328-
* Streaming object helper
329-
*
330-
* @param {Stream} data (optional)
331-
* @return {Function}
332-
*/
333-
334-
function stream_object (stream) {
335-
if (!stream) return function () {}
336-
337-
return function _stream_object (data, end) {
338-
var json = JSON.stringify(data, true, 2)
339-
340-
if (end) {
341-
stream.end(json)
342-
} else {
343-
stream.write(json)
344-
}
254+
function WalkHTML (xray, selector, scope) {
255+
return function _walkHTML ($, fn) {
256+
walk(selector, function (v, k, next) {
257+
if (typeof v === 'string') {
258+
var value = resolve($, root(scope), v)
259+
return next(null, value)
260+
} else if (typeof v === 'function') {
261+
return v($, function (err, obj) {
262+
if (err) return next(err)
263+
return next(null, obj)
264+
})
265+
} else if (isArray(v)) {
266+
if (typeof v[0] === 'string') {
267+
return next(null, resolve($, root(scope), v))
268+
} else if (typeof v[0] === 'object') {
269+
var $scope = $.find ? $.find(scope) : $(scope)
270+
var pending = $scope.length
271+
var out = []
272+
273+
// Handle the empty result set (thanks @jenbennings!)
274+
if (!pending) return next(null, out)
275+
276+
$scope.each(function (i, el) {
277+
var $innerscope = $scope.eq(i)
278+
var node = xray(scope, v[0])
279+
node($innerscope, function (err, obj) {
280+
if (err) return next(err)
281+
out[i] = obj
282+
if (!--pending) {
283+
return next(null, compact(out))
284+
}
285+
})
286+
})
287+
}
288+
}
289+
return next()
290+
}, function (err, obj) {
291+
if (err) return fn(err)
292+
fn(null, obj, $)
293+
})
345294
}
346295
}

lib/stream.js

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
var isArray = Array.isArray
2+
3+
module.exports = {
4+
/**
5+
* Streaming array helper
6+
*
7+
* @param {Stream} data (optional)
8+
*/
9+
array: function stream_array (stream) {
10+
if (!stream) return function () {}
11+
var first = true
12+
13+
return function _stream_array (data, end) {
14+
var json = JSON.stringify(data, true, 2)
15+
16+
if (first) {
17+
stream.write('[\n')
18+
first = false
19+
}
20+
21+
if (isArray(data)) {
22+
json = json.slice(1, -1)
23+
}
24+
25+
if (end) {
26+
stream.end(json + ']')
27+
} else {
28+
stream.write(json + ',')
29+
}
30+
}
31+
},
32+
33+
/**
34+
* Streaming object helper
35+
*
36+
* @param {Stream} data (optional)
37+
* @return {Function}
38+
*/
39+
object: function stream_object (stream) {
40+
if (!stream) return function () {}
41+
42+
return function _stream_object (data, end) {
43+
var json = JSON.stringify(data, true, 2)
44+
45+
if (end) {
46+
stream.end(json)
47+
} else {
48+
stream.write(json)
49+
}
50+
}
51+
},
52+
53+
waitCb: function stream_callback (stream, fn) {
54+
fn(function (err) {
55+
if (err) stream.emit('error', err)
56+
})
57+
}
58+
}

0 commit comments

Comments
 (0)