/
crawler.go
52 lines (43 loc) · 1004 Bytes
/
crawler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
package crawler
import (
"fmt"
"log"
"sync"
"time"
"github.com/ReanGD/go-web-search/content"
"github.com/ReanGD/go-web-search/proxy"
"github.com/uber-go/zap"
)
func showTotalTime(msg string, start time.Time) {
fmt.Printf("\n%s%v\n", msg, time.Now().Sub(start))
}
// Run - start download cnt pages
func Run(logger zap.Logger, baseHosts []string, cnt int) error {
now := time.Now()
defer showTotalTime("Total time=", now)
if cnt <= 0 || len(baseHosts) == 0 {
return nil
}
db, err := content.GetDBrw()
if err != nil {
log.Printf("ERROR: %s", err)
return err
}
defer db.Close()
workers := new(hostWorkers)
err = workers.Init(db, logger, baseHosts, cnt)
if err != nil {
log.Printf("ERROR: %s", err)
return err
}
var wgDB sync.WaitGroup
defer wgDB.Wait()
chDB := make(chan *proxy.PageData, cnt)
dbWorker := content.DBWorker{DB: db, ChDB: chDB}
wgDB.Add(1)
go dbWorker.Start(&wgDB)
workers.Start(chDB)
close(chDB)
showTotalTime("Workes time=", now)
return nil
}