Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| // Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan. | |
| // License: https://creativecommons.org/licenses/by-nc-sa/4.0/ | |
| // See page 125. | |
| // Findlinks2 does an HTTP GET on each URL, parses the | |
| // result as HTML, and prints the links within it. | |
| // | |
| // Usage: | |
| // findlinks url ... | |
| package main | |
| import ( | |
| "fmt" | |
| "net/http" | |
| "os" | |
| "golang.org/x/net/html" | |
| ) | |
| // visit appends to links each link found in n, and returns the result. | |
| func visit(links []string, n *html.Node) []string { | |
| if n.Type == html.ElementNode && n.Data == "a" { | |
| for _, a := range n.Attr { | |
| if a.Key == "href" { | |
| links = append(links, a.Val) | |
| } | |
| } | |
| } | |
| for c := n.FirstChild; c != nil; c = c.NextSibling { | |
| links = visit(links, c) | |
| } | |
| return links | |
| } | |
| //!+ | |
| func main() { | |
| for _, url := range os.Args[1:] { | |
| links, err := findLinks(url) | |
| if err != nil { | |
| fmt.Fprintf(os.Stderr, "findlinks2: %v\n", err) | |
| continue | |
| } | |
| for _, link := range links { | |
| fmt.Println(link) | |
| } | |
| } | |
| } | |
| // findLinks performs an HTTP GET request for url, parses the | |
| // response as HTML, and extracts and returns the links. | |
| func findLinks(url string) ([]string, error) { | |
| resp, err := http.Get(url) | |
| if err != nil { | |
| return nil, err | |
| } | |
| if resp.StatusCode != http.StatusOK { | |
| resp.Body.Close() | |
| return nil, fmt.Errorf("getting %s: %s", url, resp.Status) | |
| } | |
| doc, err := html.Parse(resp.Body) | |
| resp.Body.Close() | |
| if err != nil { | |
| return nil, fmt.Errorf("parsing %s as HTML: %v", url, err) | |
| } | |
| return visit(nil, doc), nil | |
| } | |
| //!- |