-
Notifications
You must be signed in to change notification settings - Fork 6
/
server.go
119 lines (97 loc) · 2.36 KB
/
server.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// Copyright 2017 The StudyGolang Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// http://studygolang.com
// Author: polaris polaris@studygolang.com
// 可选择是否在启动主程序时,同时嵌入 indexer 和 crawler,减少内存占用
package server
import (
"flag"
"fmt"
"os"
"time"
"sander/config"
"sander/logger"
"sander/logic"
"github.com/robfig/cron"
)
var usageStr = `
Usage: migrator [options]
Opthions:
--changeVersion <version> changeset version(1.0)
`
var (
manualIndex = flag.Bool("manual", false, "do manual index once or not")
needAll = flag.Bool("all", false, "是否需要全量抓取,默认否")
whichSite = flag.String("site", "", "抓取哪个站点(空表示所有站点)")
changeVersion = flag.String("changeVersion", "", usageStr)
)
// IndexingServer .
func IndexingServer() {
if !flag.Parsed() {
flag.Parse()
}
if *manualIndex {
indexing(true)
}
c := cron.New()
// 构建 solr 需要的索引数据
// 1 分钟一次增量
c.AddFunc("@every 1m", func() {
indexing(false)
})
// 一天一次全量
c.AddFunc("@daily", func() {
indexing(true)
})
c.Start()
}
func indexing(isAll bool) {
logger.Info("indexing start...")
start := time.Now()
defer func() {
logger.Info("indexing spend time:", time.Now().Sub(start))
}()
logic.DefaultSearcher.Indexing(isAll)
}
// CrawlServer .
func CrawlServer() {
if !flag.Parsed() {
flag.Parse()
}
go autocrawl(*needAll, *whichSite)
}
func autocrawl(needAll bool, whichSite string) {
if needAll {
if whichSite != "" {
go logic.DefaultAutoCrawl.CrawlWebsite(whichSite, needAll)
} else {
go logic.DefaultAutoCrawl.DoCrawl(needAll)
}
}
// 定时增量
c := cron.New()
c.AddFunc(config.ConfigFile.MustValue("crawl", "spec", "0 0 */1 * * ?"), func() {
// 抓取 reddit
go logic.DefaultReddit.Parse("")
projectURL := config.ConfigFile.MustValue("crawl", "project_url")
if projectURL != "" {
// 抓取 project
go logic.DefaultProject.ParseProjectList(projectURL)
}
// 抓取 article
go logic.DefaultAutoCrawl.DoCrawl(false)
})
c.Start()
}
// MigratorServer .
func MigratorServer() {
if !flag.Parsed() {
flag.Parse()
}
if *changeVersion == "" {
fmt.Printf("%s\n", usageStr)
os.Exit(1)
}
logic.DefaultMigrator.Migrator(*changeVersion)
}