This repository has been archived by the owner. It is now read-only.
Permalink
Browse files

join: Fix algorithm to strictly prioritize better matches

Previously, join used an overly greedy algorithm that could, depending
on the order of the input slice, create an 'okay' pair for an element
even if that element could also be involved in a 'perfect' pair. The
new algorithm does not necessarily return in the same order as the
previous algorithm.
  • Loading branch information...
aegamesi authored and ejj committed Sep 24, 2017
1 parent cc1220f commit 2344e44ac42baa177519cf4cf7709e94125d5cc0
Showing with 86 additions and 40 deletions.
  1. +69 −39 join/join.go
  2. +17 −1 join/join_test.go
View
@@ -6,6 +6,7 @@ import (
"reflect"
"github.com/quilt/quilt/counter"
"sort"
)
var c = counter.New("Join")
@@ -25,60 +26,89 @@ type Pair struct {
//
// Matches are made in accordance with the provided `score` function. It takes a single
// element from `lSlice`, and a single element from `rSlice`, and computes a score
// suggesting their match preference. The algorithm prefers to match pairs with the
// the score closest to zero (inclusive). Negative scores are never matched.
// representing the match priority. The algorithm strictly prioritizes lower scoring
// matches first, but negative scores are never matched. The algorithm does not minimize
// the total score of all matches.
func Join(lSlice, rSlice interface{}, score func(left, right interface{}) int) (
pairs []Pair, lonelyLefts, lonelyRights []interface{}) {
c.Inc("Join")
val := reflect.ValueOf(rSlice)
len := val.Len()
lonelyRights = make([]interface{}, 0, len)
for i := 0; i < len; i++ {
lonelyRights = append(lonelyRights, val.Index(i).Interface())
type scoredPair struct {
left int
right int
score int
}
val = reflect.ValueOf(lSlice)
len = val.Len()
Outer:
for i := 0; i < len; i++ {
l := val.Index(i).Interface()
bestScore := -1
bestIndex := -1
for i, r := range lonelyRights {
s := score(l, r)
switch {
case s < 0:
left := reflect.ValueOf(lSlice)
right := reflect.ValueOf(rSlice)
pairedLefts := map[int]struct{}{}
pairedRights := map[int]struct{}{}
scoredPairs := []scoredPair{}
pairs = []Pair{}
// Generate initial list of pairs.
OuterPairing:
for i := 0; i < left.Len(); i++ {
for j := 0; j < right.Len(); j++ {
if _, ok := pairedRights[j]; ok {
continue
case s == 0:
pairs = append(pairs, Pair{l, r})
lonelyRights = sliceDel(lonelyRights, i)
continue Outer
case s < bestScore || bestScore < 0:
bestIndex = i
bestScore = s
}
lVal := left.Index(i).Interface()
rVal := right.Index(j).Interface()
score := score(lVal, rVal)
if score == 0 {
// Pair immediately.
pairs = append(pairs, Pair{lVal, rVal})
pairedLefts[i] = struct{}{}
pairedRights[j] = struct{}{}
continue OuterPairing
} else if score > 0 {
scoredPairs = append(scoredPairs,
scoredPair{i, j, score})
}
}
}
if bestIndex >= 0 {
pairs = append(pairs, Pair{l, lonelyRights[bestIndex]})
lonelyRights = sliceDel(lonelyRights, bestIndex)
continue Outer
// Sort and collect 'best' pairs.
sort.SliceStable(scoredPairs, func(i, j int) bool {
return scoredPairs[i].score < scoredPairs[j].score
})
for _, scoredPair := range scoredPairs {
if len(pairedLefts) == left.Len() || len(pairedRights) == right.Len() {
break
}
if _, ok := pairedLefts[scoredPair.left]; ok {
continue
}
if _, ok := pairedRights[scoredPair.right]; ok {
continue
}
lonelyLefts = append(lonelyLefts, l)
lVal := left.Index(scoredPair.left).Interface()
rVal := right.Index(scoredPair.right).Interface()
pairs = append(pairs, Pair{lVal, rVal})
pairedLefts[scoredPair.left] = struct{}{}
pairedRights[scoredPair.right] = struct{}{}
}
return pairs, lonelyLefts, lonelyRights
}
// Collect unpaired elements. Iterating over the original struct ensures
// that lonelyLefts/lonelyRights are returned in a consistent order.
lonelyLefts = make([]interface{}, 0, left.Len()-len(pairedLefts))
lonelyRights = make([]interface{}, 0, right.Len()-len(pairedRights))
for i := 0; i < left.Len(); i++ {
if _, ok := pairedLefts[i]; !ok {
lonelyLefts = append(lonelyLefts, left.Index(i).Interface())
}
}
for i := 0; i < right.Len(); i++ {
if _, ok := pairedRights[i]; !ok {
lonelyRights = append(lonelyRights, right.Index(i).Interface())
}
}
func sliceDel(slice []interface{}, i int) []interface{} {
l := len(slice)
slice[i] = slice[l-1]
slice[l-1] = nil // Allow garbage collection.
return slice[:l-1]
return pairs, lonelyLefts, lonelyRights
}
// List simply requires implementing types to allow access to their contained values by
View
@@ -21,6 +21,22 @@ func TestJoin(t *testing.T) {
assert.Equal(t, []interface{}{12}, left)
assert.Equal(t, []interface{}{13}, right)
assert.Equal(t, []Pair{{10, 2}, {11, 1}}, pairs)
pairs, left, right = Join([]int{13, 14, 15}, []int{8, 9, 10, 11, 12}, score)
assert.Zero(t, len(left))
assert.Equal(t, []interface{}{8, 9}, right)
assert.Equal(t, []Pair{{13, 12}, {14, 11}, {15, 10}}, pairs)
}
func TestJoinNotGreedy(t *testing.T) {
score := func(left, right interface{}) int {
return right.(int) - left.(int)
}
pairs, left, right := Join([]int{10, 11, 12}, []int{11, 12, 13}, score)
assert.Zero(t, len(left))
assert.Zero(t, len(right))
assert.Equal(t, []Pair{{11, 11}, {12, 12}, {10, 13}}, pairs)
}
type JoinList []interface{}
@@ -95,5 +111,5 @@ func ExampleJoin() {
pairs, lonelyLefts, lonelyRights := Join(lefts, rights, score)
fmt.Println(pairs, lonelyLefts, lonelyRights)
// Output: [{a 0} {bc 2}] [def] [4]
// Output: [{bc 2} {a 0}] [def] [4]
}

0 comments on commit 2344e44

Please sign in to comment.