-
Notifications
You must be signed in to change notification settings - Fork 13
/
matching.go
74 lines (64 loc) · 1.96 KB
/
matching.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
package idmatch
import (
"github.com/src-d/eee-identity-matching/external"
simplegraph "gonum.org/v1/gonum/graph"
"gonum.org/v1/gonum/graph/simple"
"gonum.org/v1/gonum/graph/topo"
)
type node struct {
Value *Person
id int64
}
func (g node) ID() int64 {
return g.id
}
// ReducePeople merges the identities together by following the fixed set of rules.
// 1. Run the external matching, if available.
// 2. Run the series of heuristics on those items which were left untouched in the list (everything
// in case of ext == nil, not found in case of ext != nil).
//
// The heuristics are:
// TODO(vmarkovtsev): describe the current approach
func ReducePeople(people People, ext external.Matcher, blacklist Blacklist) error {
// TODO(zurk): implement external matching
peopleGraph := simple.NewUndirectedGraph()
for index, person := range people {
peopleGraph.AddNode(node{person, int64(index)})
}
// Add edges by the same unpopular email
email2id := make(map[string]simplegraph.Node)
for index, person := range people {
for _, email := range person.Emails {
if blacklist.isPopularEmail(email) {
continue
}
if val, ok := email2id[email]; ok {
peopleGraph.SetEdge(peopleGraph.NewEdge(val, peopleGraph.Node(int64(index))))
} else {
email2id[email] = peopleGraph.Node(int64(index))
}
}
}
// Add edges by the same unpopular name
name2id := make(map[string]simplegraph.Node)
for index, person := range people {
for _, name := range person.NamesWithRepos {
if blacklist.isPopularName(name.String()) {
continue
}
if val, ok := name2id[name.String()]; ok {
peopleGraph.SetEdge(peopleGraph.NewEdge(val, peopleGraph.Node(int64(index))))
} else {
name2id[name.String()] = peopleGraph.Node(int64(index))
}
}
}
for _, component := range topo.ConnectedComponents(peopleGraph) {
var toMerge []uint64
for _, node := range component {
toMerge = append(toMerge, uint64(node.ID()))
}
people.Merge(toMerge...)
}
return nil
}