Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| // Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan. | |
| // License: https://creativecommons.org/licenses/by-nc-sa/4.0/ | |
| // See page 138. | |
| //!+Extract | |
| // Package links provides a link-extraction function. | |
| package links | |
| import ( | |
| "fmt" | |
| "net/http" | |
| "golang.org/x/net/html" | |
| ) | |
| // Extract makes an HTTP GET request to the specified URL, parses | |
| // the response as HTML, and returns the links in the HTML document. | |
| func Extract(url string) ([]string, error) { | |
| resp, err := http.Get(url) | |
| if err != nil { | |
| return nil, err | |
| } | |
| if resp.StatusCode != http.StatusOK { | |
| resp.Body.Close() | |
| return nil, fmt.Errorf("getting %s: %s", url, resp.Status) | |
| } | |
| doc, err := html.Parse(resp.Body) | |
| resp.Body.Close() | |
| if err != nil { | |
| return nil, fmt.Errorf("parsing %s as HTML: %v", url, err) | |
| } | |
| var links []string | |
| visitNode := func(n *html.Node) { | |
| if n.Type == html.ElementNode && n.Data == "a" { | |
| for _, a := range n.Attr { | |
| if a.Key != "href" { | |
| continue | |
| } | |
| link, err := resp.Request.URL.Parse(a.Val) | |
| if err != nil { | |
| continue // ignore bad URLs | |
| } | |
| links = append(links, link.String()) | |
| } | |
| } | |
| } | |
| forEachNode(doc, visitNode, nil) | |
| return links, nil | |
| } | |
| //!-Extract | |
| // Copied from gopl.io/ch5/outline2. | |
| func forEachNode(n *html.Node, pre, post func(n *html.Node)) { | |
| if pre != nil { | |
| pre(n) | |
| } | |
| for c := n.FirstChild; c != nil; c = c.NextSibling { | |
| forEachNode(c, pre, post) | |
| } | |
| if post != nil { | |
| post(n) | |
| } | |
| } |