forked from admpub/nging
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rules.go
123 lines (110 loc) · 3.39 KB
/
rules.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
Nging is a toolbox for webmasters
Copyright (C) 2018-present Wenhui Shen <swh@admpub.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package exec
import (
"errors"
"fmt"
"github.com/webx-top/echo"
"github.com/admpub/nging/application/dbschema"
"github.com/admpub/nging/application/library/collector"
"github.com/admpub/nging/application/library/collector/sender"
"github.com/admpub/nging/application/library/notice"
)
var ErrForcedExit = errors.New(`Forced exit`)
// Rules 完整规则
type Rules struct {
*Rule //主页面规则
Extra []*Rule //扩展页面规则
exportFn func(pageID uint, lastResult *Recv, collected echo.Store, noticeSender sender.Notice) error
isExited func() bool
}
func NewRules() *Rules {
return &Rules{
Rule: &Rule{
NgingCollectorPage: &dbschema.NgingCollectorPage{},
RuleList: []*dbschema.NgingCollectorRule{},
},
Extra: []*Rule{},
}
}
func (c *Rules) SetExportFn(exportFn func(pageID uint, lastResult *Recv, collected echo.Store, noticeSender sender.Notice) error) *Rules {
c.exportFn = exportFn
return c
}
func (c *Rules) SetExitedFn(exitedFn func() bool) *Rules {
c.isExited = exitedFn
return c
}
func (c *Rules) Collect(debug bool, noticeSender sender.Notice, progress *notice.Progress) (rs []Result, err error) {
defer func() {
if panicErr := recover(); panicErr != nil {
err = fmt.Errorf(`%v`, panicErr)
return
}
}()
var fetch Fether
timeout := int(c.Rule.NgingCollectorPage.Timeout)
engine := c.Rule.NgingCollectorPage.Browser
if len(engine) == 0 || engine == `default` {
engine = `standard`
}
var browser collector.Browser
browserService, ok := collector.Services.Load(engine)
if ok {
browser = browserService.(collector.Browser)
} else {
browser, ok = collector.Browsers[engine]
if !ok {
return nil, fmt.Errorf(`Unsupported: %s`, engine)
}
if err := browser.Start(echo.Store{
`timeout`: timeout,
`proxy`: c.Rule.NgingCollectorPage.Proxy,
`delay`: c.Rule.NgingCollectorPage.Waits,
}); err != nil {
return nil, err
}
collector.Services.Store(engine, browser)
}
browseData := make(echo.Store)
fetch = func(pageURL string, charset string) ([]byte, bool, error) {
browseData.Set(`charset`, charset)
body, err := browser.Do(pageURL, browseData)
return body, browser.Transcoded(), err
}
c.Rule.debug = debug
c.Rule.exportFn = c.exportFn
c.Rule.isExited = c.isExited
// err = browser.Close()
index := -1 //子页面层级计数,用来遍历c.Extra中的元素,-1表示入口页面
//入口页面
c.Rule.result = &Recv{
index: -1,
rule: c.Rule,
title: ``,
url: ``,
}
if noticeSender == nil {
noticeSender = sender.Default
}
return c.Rule.Collect(
uint64(c.NgingCollectorPage.ParentId),
fetch,
index,
c.Extra,
noticeSender,
progress,
)
}