/
extract.go
105 lines (92 loc) · 2.05 KB
/
extract.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
package utils
import (
"bytes"
"fmt"
"os"
"path/filepath"
"strings"
"text/template"
"github.com/PuerkitoBio/goquery"
)
var (
attrs = []string{"href", "src"}
prefixes = []string{"http://", "https://", "javascript:", "#"}
)
// Blob parsing structure.
type Blob struct {
AppID string
MD5 string
FileName string
}
// ReplaceAttr replace label properties.
func ReplaceAttr(blob Blob, blobTemplate, subFilePath, dst string) (*bytes.Buffer, error) {
// read file
file, err := os.Open(subFilePath)
if err != nil {
return nil, err
}
defer file.Close() // nolint: errcheck, gosec
dom, err := goquery.NewDocumentFromReader(file)
if err != nil {
return nil, err
}
for _, attr := range attrs {
selector := fmt.Sprintf("[%s]", attr)
dom.Find(selector).Each(func(i int, selection *goquery.Selection) {
val, exists := selection.Attr(attr)
if !exists || val == "" || skipPrefix(val) {
return
}
// remove the file name and temporary directory
blob.FileName = replacePath(subFilePath, val, dst)
value := ExecuteURL(blob, blobTemplate)
selection.SetAttr(attr, value)
})
}
ret, _ := dom.Html()
buf := bytes.NewBufferString(ret)
return buf, nil
}
func replacePath(subFilePath, val, dst string) string {
dir := cleanDir(subFilePath, dst)
valArr := strings.Split(val, "/")
for _, val := range valArr {
index := strings.LastIndex(dir, "/")
if index == -1 {
index = 0
}
switch val {
case ".", "/":
case "..":
dir = dir[:index]
default:
dir = filepath.Join(dir, val)
}
}
return dir
}
func cleanDir(subFilePath, dst string) string {
dir := filepath.Dir(subFilePath)
dir = strings.Replace(dir, dst, "", 1)
if dir != "" {
dir = dir[1:]
}
return dir
}
func skipPrefix(val string) bool {
flag := false
for _, prefix := range prefixes {
if strings.HasPrefix(val, prefix) {
flag = true
break
}
}
return flag
}
// ExecuteURL ExecuteURL.
func ExecuteURL(blob Blob, url string) string {
var buf bytes.Buffer
t, _ := template.New("").Parse(url)
_ = t.Execute(&buf, blob)
return buf.String()
}