-
Notifications
You must be signed in to change notification settings - Fork 0
/
web.go
157 lines (142 loc) · 3.9 KB
/
web.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
package web
import (
"errors"
"fmt"
"github.com/sirupsen/logrus"
"github.com/sp0x/surf/browser"
"github.com/sp0x/torrentd/indexer/cache"
"github.com/sp0x/torrentd/indexer/source"
"net/url"
"regexp"
"strings"
)
const (
searchMethodPost = "post"
searchMethodGet = "get"
)
//ContentFetcher is a content fetcher that deals with the state of sources
type ContentFetcher struct {
Browser browser.Browsable
Cacher ContentCacher
ConnectivityTester cache.ConnectivityTester
options FetchOptions
}
type FetchOptions struct {
DumpData bool
}
func NewWebContentFetcher(browser browser.Browsable, contentCache ContentCacher, connectivityTester cache.ConnectivityTester, options FetchOptions) source.ContentFetcher {
if connectivityTester == nil {
panic("a connectivity tester is required")
}
return &ContentFetcher{
Browser: browser,
//We'll use the indexer to cache content.
Cacher: contentCache,
ConnectivityTester: connectivityTester,
options: options,
}
}
type ContentCacher interface {
CachePage(browsable browser.Browsable) error
}
func (w *ContentFetcher) Cleanup() {
w.Browser.HistoryJar().Clear()
}
func (w *ContentFetcher) FetchUrl(url string) error {
target := source.SearchTarget{Url: url}
err := w.get(target.Url)
if err != nil {
w.ConnectivityTester.Invalidate(target.Url)
}
return err
}
//Gets the content from which we'll extract the search results
func (w *ContentFetcher) Fetch(target *source.SearchTarget) error {
if target == nil {
return errors.New("target is required for searching")
}
defer func() {
//After we're done we'll cleanup the history of the browser.
w.Cleanup()
}()
var err error
switch target.Method {
case "", searchMethodGet:
if len(target.Values) > 0 {
target.Url = fmt.Sprintf("%s?%s", target.Url, target.Values.Encode())
}
if err = w.get(target.Url); err != nil {
w.ConnectivityTester.Invalidate(target.Url)
return err
}
case searchMethodPost:
if err = w.Post(target.Url, target.Values, true); err != nil {
w.ConnectivityTester.Invalidate(target.Url)
return err
}
default:
return fmt.Errorf("unknown search method %q", target.Method)
}
w.dumpFetchData()
return nil
}
func (w *ContentFetcher) get(targetUrl string) error {
logrus.WithField("target", targetUrl).
Debug("Opening page")
err := w.Browser.Open(targetUrl)
if err != nil {
return err
}
if w.Cacher != nil {
_ = w.Cacher.CachePage(w.Browser.NewTab())
}
logrus.
WithFields(logrus.Fields{"code": w.Browser.StatusCode(), "page": w.Browser.Url()}).
Debugf("Finished request")
if err = w.handleMetaRefreshHeader(); err != nil {
w.ConnectivityTester.Invalidate(targetUrl)
return err
}
return nil
}
func (w *ContentFetcher) Post(url string, data url.Values, log bool) error {
if log {
logrus.
WithFields(logrus.Fields{"url": url, "vals": data.Encode()}).
Debugf("Posting to page")
}
if err := w.Browser.PostForm(url, data); err != nil {
return err
}
if w.Cacher != nil {
_ = w.Cacher.CachePage(w.Browser.NewTab())
}
logrus.
WithFields(logrus.Fields{"code": w.Browser.StatusCode(), "page": w.Browser.Url()}).
Debugf("Finished request")
if err := w.handleMetaRefreshHeader(); err != nil {
w.ConnectivityTester.Invalidate(url)
return err
}
return nil
}
// this should eventually upstream into surf browser
//Handle a header like: Refresh: 0;url=my_view_page.php
func (w *ContentFetcher) handleMetaRefreshHeader() error {
h := w.Browser.ResponseHeaders()
if refresh := h.Get("Refresh"); refresh != "" {
requestUrl := w.Browser.State().Request.URL
if s := regexp.MustCompile(`\s*;\s*`).Split(refresh, 2); len(s) == 2 {
logrus.
WithField("fields", s).
Debug("Found refresh header")
requestUrl.Path = strings.TrimPrefix(s[1], "url=")
err := w.get(requestUrl.String())
if err != nil {
w.ConnectivityTester.Invalidate(requestUrl.String())
}
return err
}
}
return nil
}