Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| // Copyright © 2016 Alan A. A. Donovan & Brian W. Kernighan. | |
| // License: https://creativecommons.org/licenses/by-nc-sa/4.0/ | |
| // See page 243. | |
| // Crawl3 crawls web links starting with the command-line arguments. | |
| // | |
| // This version uses bounded parallelism. | |
| // For simplicity, it does not address the termination problem. | |
| // | |
| package main | |
| import ( | |
| "fmt" | |
| "log" | |
| "os" | |
| "gopl.io/ch5/links" | |
| ) | |
| func crawl(url string) []string { | |
| fmt.Println(url) | |
| list, err := links.Extract(url) | |
| if err != nil { | |
| log.Print(err) | |
| } | |
| return list | |
| } | |
| //!+ | |
| func main() { | |
| worklist := make(chan []string) // lists of URLs, may have duplicates | |
| unseenLinks := make(chan string) // de-duplicated URLs | |
| // Add command-line arguments to worklist. | |
| go func() { worklist <- os.Args[1:] }() | |
| // Create 20 crawler goroutines to fetch each unseen link. | |
| for i := 0; i < 20; i++ { | |
| go func() { | |
| for link := range unseenLinks { | |
| foundLinks := crawl(link) | |
| go func() { worklist <- foundLinks }() | |
| } | |
| }() | |
| } | |
| // The main goroutine de-duplicates worklist items | |
| // and sends the unseen ones to the crawlers. | |
| seen := make(map[string]bool) | |
| for list := range worklist { | |
| for _, link := range list { | |
| if !seen[link] { | |
| seen[link] = true | |
| unseenLinks <- link | |
| } | |
| } | |
| } | |
| } | |
| //!- |