-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawler.go
94 lines (74 loc) · 2.02 KB
/
crawler.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
package main
import (
"context"
"github.com/chromedp/chromedp"
"sync"
"time"
"github.com/GlidingTracks/gt-crawler/auth"
"github.com/GlidingTracks/gt-crawler/chrome"
"github.com/GlidingTracks/gt-crawler/sites"
jConfigGo "github.com/MarkusAJacobsen/jConfig-go"
"github.com/Sirupsen/logrus"
)
func init() {
//logrus.SetFormatter(&logrus.JSONFormatter{})
//logrus.SetOutput(os.Stdout)
logrus.SetLevel(logrus.DebugLevel)
}
func main() {
logrus.Info("Starting crawler")
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Hour)
defer cancel()
conf, err := getConfig()
if err != nil {
logrus.Fatal("Could not get config, storing result locally")
// CHECK STORAGE AND UPLOAD RESIDUALS
}
//cancle headless mode and open GUI
options := []chromedp.ExecAllocatorOption{
chromedp.Flag("headless", false),
chromedp.Flag("hide-scrollbars", false),
chromedp.Flag("mute-audio", false),
chromedp.UserAgent(`Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36`),
}
options = append(chromedp.DefaultExecAllocatorOptions[:], options...)
c, cc := chromedp.NewExecAllocator(ctx, options...)
defer cc()
var wg sync.WaitGroup
links := crawl(c, &wg)
upload(ctx, conf, links)
wg.Wait()
}
func crawl(ctx context.Context, wg *sync.WaitGroup) (links []string) {
defer wg.Done()
c := &chrome.Chrome{}
cSites := []sites.ChromeSite{&sites.XContestChrome{}}
crawlRes := make(chan []string)
wg.Add(1)
go c.Crawl(ctx, cSites, crawlRes)
links = <-crawlRes
close(crawlRes)
return
}
func upload(ctx context.Context, conf State, links []string) {
if len(links) == 0 {
logrus.Info("No links uploaded, empty input")
return
}
up := &Upload{
Auth: auth.FAuth{},
}
uploaded := up.UploadLinks(ctx, links, conf)
if uploaded {
logrus.Info("Links uploaded")
}
}
func getConfig() (state State, err error) {
conf := jConfigGo.Config{}
if err = conf.CreateConfig("state"); err != nil {
return
}
state = State{}
err = conf.Get(&state)
return
}