-
Notifications
You must be signed in to change notification settings - Fork 0
/
content.go
61 lines (53 loc) · 1.51 KB
/
content.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package bslc
import (
"bytes"
"io"
"io/ioutil"
"net/http"
"net/url"
"path"
"strings"
)
// Content represents the returned content from a crawled URL. The content's
// origin URL is saved in URL. ContentType holds the MIME type as specified by
// the remote server. Filename is the content's filename or an empty string if
// one cannot be determined. Done is channel that must be signalled after all
// processing of the content is done.
type Content struct {
URL url.URL
ContentType string
Filename string
Done chan bool
body io.ReadCloser
data []byte
}
// Reader returns a new io.Reader for the crawled content.
func (c *Content) Reader() io.Reader {
return bytes.NewBuffer(c.data)
}
func newContent(res *http.Response, ch chan bool) *Content {
mimeType := res.Header.Get("Content-Type")
if strings.Contains(mimeType, ";") {
mimeType = strings.Split(mimeType, ";")[0]
}
_, filename := path.Split(res.Request.URL.Path)
if cd := res.Header.Get("Content-Disposition"); cd != "" {
for _, scd := range strings.Split(cd, "; ") {
if strings.HasPrefix(scd, "filename=") {
filename = strings.TrimPrefix(scd, "filename=")
break
}
}
}
return &Content{
URL: *res.Request.URL,
ContentType: mimeType,
Filename: filename,
Done: ch,
body: res.Body,
}
}
func (c *Content) readBody() {
c.data, _ = ioutil.ReadAll(c.body)
c.body.Close()
}