/
overview.go
151 lines (142 loc) · 4.49 KB
/
overview.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package frontend
import (
"bytes"
"fmt"
"net/url"
"path"
"path/filepath"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"github.com/khulnasoft-lab/godep/internal"
"github.com/khulnasoft-lab/godep/internal/derrors"
"github.com/khulnasoft-lab/godep/internal/source"
)
// isMarkdown reports whether filename says that the file contains markdown.
func isMarkdown(filename string) bool {
ext := strings.ToLower(filepath.Ext(filename))
// https://tools.ietf.org/html/rfc7763 mentions both extensions.
return ext == ".md" || ext == ".markdown"
}
// translateLink converts image links so that they will work on godep.
//
// README files sometimes use relative image paths to image files inside the
// repository. As the discovery site doesn't host the full repository content,
// in order for the image to render, we need to convert the relative path to an
// absolute URL to a hosted image.
//
// In addition, GitHub will translate absolute non-raw links to image files to raw links.
// For example, when GitHub renders a README with
//
// <img src="https://github.com/gobuffalo/buffalo/blob/master/logo.svg">
//
// it rewrites it to
//
// <img src="https://github.com/gobuffalo/buffalo/raw/master/logo.svg">
//
// (replacing "blob" with "raw").
// We do that too.
func translateLink(dest string, info *source.Info, useRaw bool, readme *internal.Readme) string {
destURL, err := url.Parse(dest)
if err != nil {
return ""
}
if destURL.IsAbs() {
if destURL.Host != "github.com" {
return ""
}
if strings.HasSuffix(destURL.Path, ".md") {
return ""
}
parts := strings.Split(destURL.Path, "/")
if len(parts) < 4 || parts[3] != "blob" {
return ""
}
parts[3] = "raw"
destURL.Path = strings.Join(parts, "/")
return destURL.String()
}
if destURL.Path == "" {
// This is a fragment; leave it.
return "#readme-" + destURL.Fragment
}
// Paths are relative to the README location.
destPath := path.Join(path.Dir(readme.Filepath), path.Clean(trimmedEscapedPath(destURL)))
if useRaw {
return info.RawURL(destPath)
}
return info.FileURL(destPath)
}
// trimmedEscapedPath trims surrounding whitespace from u's path, then returns it escaped.
func trimmedEscapedPath(u *url.URL) string {
u.Path = strings.TrimSpace(u.Path)
return u.EscapedPath()
}
// translateHTML parses html text into parsed html nodes. It then
// iterates through the nodes and replaces the src key with a value
// that properly represents the source of the image from the repo.
func translateHTML(htmlText []byte, info *source.Info, readme *internal.Readme) (_ []byte, err error) {
defer derrors.Wrap(&err, "translateHTML(readme.Filepath=%s)", readme.Filepath)
r := bytes.NewReader(htmlText)
nodes, err := html.ParseFragment(r, nil)
if err != nil {
return nil, err
}
var buf bytes.Buffer
changed := false
for _, n := range nodes {
// We expect every parsed node to begin with <html><head></head><body>.
if n.DataAtom != atom.Html {
return nil, fmt.Errorf("top-level node is %q, expected 'html'", n.DataAtom)
}
// When the parsed html nodes don't have a valid structure
// (i.e: an html comment), then just return the original text.
if n.FirstChild == nil || n.FirstChild.NextSibling == nil || n.FirstChild.NextSibling.DataAtom != atom.Body {
return htmlText, nil
}
n = n.FirstChild.NextSibling
// n is now the body node. Walk all its children.
for c := n.FirstChild; c != nil; c = c.NextSibling {
if walkHTML(c, info, readme) {
changed = true
}
if err := html.Render(&buf, c); err != nil {
return nil, err
}
}
}
if changed {
return buf.Bytes(), nil
}
// If there were no changes, return the original.
return htmlText, nil
}
// walkHTML crawls through an html node and replaces the src
// tag link with a link that properly represents the image
// from the repo source.
// It reports whether it made a change.
func walkHTML(n *html.Node, info *source.Info, readme *internal.Readme) bool {
changed := false
if n.Type == html.ElementNode && n.DataAtom == atom.Img {
var attrs []html.Attribute
for _, a := range n.Attr {
if a.Key == "src" {
if v := translateLink(a.Val, info, true, readme); v != "" {
a.Val = v
changed = true
}
}
attrs = append(attrs, a)
}
n.Attr = attrs
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if walkHTML(c, info, readme) {
changed = true
}
}
return changed
}