This repository has been archived by the owner on Nov 14, 2023. It is now read-only.
/
index.spec.ts
104 lines (94 loc) · 2.33 KB
/
index.spec.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import MockServer from 'mock-http-server'
import Crawler from '../src'
const server = new MockServer({ host: 'localhost', port: 9000 }, null)
const createPage = (id: string) => {
server.on({
method: 'GET',
path: `/${id}`,
reply: {
status: 200,
headers: { 'content-type': 'text/html' },
body: `<div id="node">${id}</div>`
}
})
return `http://localhost:9000/${id}`
}
beforeEach(done => {
server.start(done)
})
afterEach(done => {
server.stop(done)
})
describe('Crawler', () => {
it('new', async () => {
const crawler = new Crawler({
pageEvaluate: () => {
console.log(location.href)
}
})
expect(crawler).toBeInstanceOf(Crawler)
})
})
describe('launch and close', () => {
it('launch and close', async () => {
const pageNames = ['test', 'demo']
const urls = pageNames.map(createPage)
const crawler = new Crawler({
parallel: 1,
pageEvaluate: () => {
return document.querySelector('#node').innerHTML
}
})
crawler.queue(urls)
await crawler.launch()
const result = await crawler.start()
await crawler.close()
result.map(({ url, result }) => {
expect(url.includes(result)).toBeTruthy()
})
})
})
describe('queue', () => {
it('queue url', () => {
const crawler = new Crawler({
pageEvaluate: () => {
return location.href
}
})
crawler.queue('https://baidu.com')
expect(crawler.urls.length).toBe(1)
expect(crawler.urls[0]).toBe('https://baidu.com')
})
it('queue invalid url', () => {
const crawler = new Crawler({
pageEvaluate: () => {
console.log(location.href)
}
})
crawler.queue('baidu.com')
expect(crawler.urls.length).toBe(0)
})
})
describe('next', () => {
it('next', async () => {
const pageNames = ['test', 'demo']
const urls = pageNames.map(createPage)
const crawler = new Crawler({
parallel: 2,
next: (result, page) => {
if (!pageNames.includes('xxx')) {
pageNames.push('xxx')
crawler.queue(createPage('xxx'))
}
},
pageEvaluate: () => {
return document.querySelector('#node').innerHTML
}
})
crawler.queue(urls)
await crawler.launch()
const result = await crawler.start()
await crawler.close()
expect(result.length).toBe(3)
})
})