-
Notifications
You must be signed in to change notification settings - Fork 147
/
pipeline.go
205 lines (190 loc) · 5.56 KB
/
pipeline.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
package hercules
import (
"errors"
"fmt"
"io"
"os"
"bufio"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/hercules.v2/toposort"
)
type PipelineItem interface {
// Name returns the name of the analysis.
Name() string
// Provides returns the list of keys of reusable calculated entities.
// Other items may depend on them.
Provides() []string
// Requires returns the list of keys of needed entities which must be supplied in Consume().
Requires() []string
// Initialize prepares and resets the item. Consume() requires Initialize()
// to be called at least once beforehand.
Initialize(*git.Repository)
// Consume processes the next commit.
// deps contains the required entities which match Depends(). Besides, it always includes
// "commit" and "index".
// Returns the calculated entities which match Provides().
Consume(deps map[string]interface{}) (map[string]interface{}, error)
// Finalize returns the result of the analysis.
Finalize() interface{}
}
type Pipeline struct {
// OnProgress is the callback which is invoked in Analyse() to output it's
// progress. The first argument is the number of processed commits and the
// second is the total number of commits.
OnProgress func(int, int)
// repository points to the analysed Git repository struct from go-git.
repository *git.Repository
// items are the registered analysers in the pipeline.
items []PipelineItem
// plan is the resolved execution sequence.
plan []PipelineItem
}
func NewPipeline(repository *git.Repository) *Pipeline {
return &Pipeline{repository: repository, items: []PipelineItem{}, plan: []PipelineItem{}}
}
func (pipeline *Pipeline) AddItem(item PipelineItem) {
for _, reg := range pipeline.items {
if reg == item {
return
}
}
pipeline.items = append(pipeline.items, item)
}
func (pipeline *Pipeline) RemoveItem(item PipelineItem) {
for i, reg := range pipeline.items {
if reg == item {
pipeline.items = append(pipeline.items[:i], pipeline.items[i+1:]...)
return
}
}
}
// Commits returns the critical path in the repository's history. It starts
// from HEAD and traces commits backwards till the root. When it encounters
// a merge (more than one parent), it always chooses the first parent.
func (pipeline *Pipeline) Commits() []*object.Commit {
result := []*object.Commit{}
repository := pipeline.repository
head, err := repository.Head()
if err != nil {
panic(err)
}
commit, err := repository.CommitObject(head.Hash())
if err != nil {
panic(err)
}
// the first parent matches the head
for ; err != io.EOF; commit, err = commit.Parents().Next() {
if err != nil {
panic(err)
}
result = append(result, commit)
}
// reverse the order
for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
result[i], result[j] = result[j], result[i]
}
return result
}
func (pipeline *Pipeline) Initialize() {
graph := toposort.NewGraph()
name2item := map[string]PipelineItem{}
for index, item := range pipeline.items {
name := fmt.Sprintf("%s_%d", item.Name(), index)
graph.AddNode(name)
name2item[name] = item
for _, key := range item.Provides() {
key += "_entity"
graph.AddNode(key)
graph.AddEdge(name, key)
}
}
for index, item := range pipeline.items {
name := fmt.Sprintf("%s_%d", item.Name(), index)
for _, key := range item.Requires() {
key += "_entity"
if !graph.AddEdge(key, name) {
panic(fmt.Sprintf("Unsatisfied dependency: %s -> %s", key, item.Name()))
}
}
}
strplan, ok := graph.Toposort()
if !ok {
panic("Failed to resolve pipeline dependencies.")
}
for _, key := range strplan {
item, ok := name2item[key]
if ok {
pipeline.plan = append(pipeline.plan, item)
}
}
if len(pipeline.plan) != len(pipeline.items) {
panic("Internal pipeline dependency resolution error.")
}
for _, item := range pipeline.items {
item.Initialize(pipeline.repository)
}
}
// Run executes the pipeline.
//
// commits is a slice with the sequential commit history. It shall start from
// the root (ascending order).
func (pipeline *Pipeline) Run(commits []*object.Commit) (map[PipelineItem]interface{}, error) {
onProgress := pipeline.OnProgress
if onProgress == nil {
onProgress = func(int, int) {}
}
for index, commit := range commits {
onProgress(index, len(commits))
state := map[string]interface{}{"commit": commit, "index": index}
for _, item := range pipeline.plan {
update, err := item.Consume(state)
if err != nil {
fmt.Fprintf(os.Stderr, "%s failed on commit #%d %s\n",
item.Name(), index, commit.Hash.String())
return nil, err
}
for _, key := range item.Provides() {
val, ok := update[key]
if !ok {
panic(fmt.Sprintf("%s: Consume() did not return %s", item.Name(), key))
}
state[key] = val
}
}
}
onProgress(len(commits), len(commits))
result := map[PipelineItem]interface{}{}
for _, item := range pipeline.items {
result[item] = item.Finalize()
}
return result, nil
}
func LoadCommitsFromFile(path string, repository *git.Repository) ([]*object.Commit, error) {
var file io.ReadCloser
if path != "-" {
var err error
file, err = os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
} else {
file = os.Stdin
}
scanner := bufio.NewScanner(file)
commits := []*object.Commit{}
for scanner.Scan() {
hash := plumbing.NewHash(scanner.Text())
if len(hash) != 20 {
return nil, errors.New("invalid commit hash " + scanner.Text())
}
commit, err := repository.CommitObject(hash)
if err != nil {
return nil, err
}
commits = append(commits, commit)
}
return commits, nil
}