Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module github.com/google/zoekt

require (
github.com/RoaringBitmap/roaring v0.4.18
github.com/andygrunwald/go-gerrit v0.0.0-20181026193842-43cfd7a94eb4
github.com/fsnotify/fsnotify v1.4.7
github.com/gfleury/go-bitbucket-v1 v0.0.0-20181102191809-4910839b609e
Expand Down
22 changes: 22 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
github.com/RoaringBitmap/roaring v0.4.18 h1:nh8Ngxctxt5QAoMLuR7MHJe4jEqpn+EnsdgDWPryQWo=
github.com/RoaringBitmap/roaring v0.4.18/go.mod h1:D3qVegWTmfCaX4Bl5CrBE9hfrSrrXIr8KVNvRsDi1NI=
github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 h1:uSoVVbwJiQipAclBbw+8quDsfcvFjOpI5iCf4p/cqCs=
github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7/go.mod h1:6zEj6s6u/ghQa61ZWa/C2Aw3RkjiTBOix7dkqa1VLIs=
github.com/andygrunwald/go-gerrit v0.0.0-20181026193842-43cfd7a94eb4 h1:Nu1m/Uyela0+Z41Ajo78sa1mf18CzLztk8JdNyeL5uk=
github.com/andygrunwald/go-gerrit v0.0.0-20181026193842-43cfd7a94eb4/go.mod h1:0iuRQp6WJ44ts+iihy5E/WlPqfg5RNeQxOmzRkxCdtk=
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 h1:kFOfPq6dUM1hTo4JG6LR5AXSUEsOjtdm0kw0FtQtMJA=
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/emirpasic/gods v1.9.0 h1:rUF4PuzEjMChMiNsVjdI+SyLu7rEqpQ5reNFnhC7oFo=
Expand All @@ -16,8 +19,13 @@ github.com/gfleury/go-bitbucket-v1 v0.0.0-20181102191809-4910839b609e h1:EN5E7Dm
github.com/gfleury/go-bitbucket-v1 v0.0.0-20181102191809-4910839b609e/go.mod h1:Se0U4YUmRkRAOh8kD7KXz+3VCUBmvTFcdWP2QYYRjjc=
github.com/gliderlabs/ssh v0.1.1 h1:j3L6gSLQalDETeEg/Jg0mGY0/y/N6zI2xX1978P0Uqw=
github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20180728074245-46e3a41ad493/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY=
Expand All @@ -26,13 +34,15 @@ github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASu
github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck=
github.com/google/slothfs v0.0.0-20170112234537-ecdd255f653d h1:ADHffp2KLaMypb4pG5pBJ8AezYRvGxQQ8vnH0E1K04c=
github.com/google/slothfs v0.0.0-20170112234537-ecdd255f653d/go.mod h1:kzvK/MFjZSNdFgc1tCZML3E1nVvnB4/npSKEuvMoECU=
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/hashicorp/go-cleanhttp v0.5.0 h1:wvCrVc9TjDls6+YGAF2hAifE1E5U1+b4tH6KdvN3Gig=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-retryablehttp v0.5.1 h1:Vsx5XKPqPs3M6sM4U4GWyUqFS8aBiL9U5gkgvpkg4SE=
github.com/hashicorp/go-retryablehttp v0.5.1/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A=
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo=
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/keegancsmith/rpc v1.1.0 h1:bXVRk3EzbtrEegTGKxNTc+St1lR7t/Z1PAO8misBnCc=
github.com/keegancsmith/rpc v1.1.0/go.mod h1:Xow74TKX34OPPiPCdz6x1o9c0SCxRqGxDuKGk7ZOo8s=
github.com/kevinburke/ssh_config v0.0.0-20180830205328-81db2a75821e h1:RgQk53JHp/Cjunrr1WlsXSZpqXn+uREuHvUVcK82CV8=
Expand All @@ -48,18 +58,30 @@ github.com/mitchellh/go-homedir v1.0.0 h1:vKb8ShqSby24Yrqr/yDYkuFz8d0WUjys40rvnG
github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae h1:VeRdUYdCw49yizlSbMEn2SZ+gT+3IUKx8BqxyQdz+BY=
github.com/mschoch/smat v0.0.0-20160514031455-90eadee771ae/go.mod h1:qAyveg+e4CE+eKJXWVjKXM4ck2QobLqTDytGJbLLhJg=
github.com/pelletier/go-buffruneio v0.2.0 h1:U4t4R6YkofJ5xHm3dJzuRpPZ0mr5MMCoAWooScCR7aA=
github.com/pelletier/go-buffruneio v0.2.0/go.mod h1:JkE26KsDizTr40EUHkXVtNPvgGtbSNq5BcowyYOWdKo=
github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ=
github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/goconvey v0.0.0-20190306220146-200a235640ff/go.mod h1:KSQcGKpxUMHk3nbYzs/tIBAM2iDooCn0BmttHOJEbLs=
github.com/src-d/gcfg v1.4.0 h1:xXbNR5AlLSA315x2UO+fTSSAXCDf+Ar38/6oyGbDKQ4=
github.com/src-d/gcfg v1.4.0/go.mod h1:p/UMsR43ujA89BJY9duynAwIpvqEujIH/jFlfL7jWoI=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xanzy/go-gitlab v0.13.0 h1:vBxlISwRackWHqZb4IaMDycTrlfJ0918ZlpZjL20Zyk=
github.com/xanzy/go-gitlab v0.13.0/go.mod h1:8zdQa/ri1dfn8eS3Ir1SyfvOKlw7WBJ8DVThkpGiXrs=
github.com/xanzy/ssh-agent v0.2.0 h1:Adglfbi5p9Z0BmK2oKU9nTG+zKfniSfnaMYB+ULd+Ro=
Expand Down
63 changes: 36 additions & 27 deletions hititer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
package zoekt

import (
"encoding/binary"
"fmt"

"github.com/RoaringBitmap/roaring"
)

// hitIterator finds potential search matches, measured in offsets of
Expand Down Expand Up @@ -133,8 +134,8 @@ func (d *indexData) trigramHitIterator(ng ngram, caseSensitive, fileName bool) (
if err != nil {
return nil, err
}
if len(blob) > 0 {
iters = append(iters, newCompressedPostingIterator(blob, v))
if len(blob) > 0 { // why this check?
iters = append(iters, newBitmapIterator(blob, v))
}
}

Expand Down Expand Up @@ -176,53 +177,61 @@ func (i *inMemoryIterator) next(limit uint32) {
}
}

// compressedPostingIterator goes over a delta varint encoded posting
// list.
type compressedPostingIterator struct {
blob, orig []byte
_first uint32
what ngram
// bitmapIterator goes over a posting list encoded as a roaring bitmap.
type bitmapIterator struct {
it roaring.IntIterable
_first uint32
size uint32
what ngram
}

func newCompressedPostingIterator(b []byte, w ngram) *compressedPostingIterator {
d, sz := binary.Uvarint(b)
return &compressedPostingIterator{
_first: uint32(d),
blob: b[sz:],
orig: b,
func newBitmapIterator(blob []byte, w ngram) *bitmapIterator {
b := roaring.New()
b.FromBuffer(blob)

it := b.Iterator()
first := uint32(maxUInt32)
if it.HasNext() {
first = it.Next()
}

return &bitmapIterator{
it: it,
_first: first,
size: uint32(len(blob)),
what: w,
}
}

func (i *compressedPostingIterator) String() string {
return fmt.Sprintf("compressed(%s, %d, [%d bytes])", i.what, i._first, len(i.blob))
func (i *bitmapIterator) String() string {
return fmt.Sprintf("bitmap(%s)", i.what)
}

func (i *compressedPostingIterator) first() uint32 {
func (i *bitmapIterator) first() uint32 {
return i._first
}

func (i *compressedPostingIterator) next(limit uint32) {
func (i *bitmapIterator) next(limit uint32) {
if limit == maxUInt32 {
i.blob = nil
i._first = maxUInt32
return
}

if i._first <= limit && len(i.blob) == 0 {
hasNext := i.it.HasNext()

if i._first <= limit && !hasNext {
i._first = maxUInt32
return
}

for i._first <= limit && len(i.blob) > 0 {
delta, sz := binary.Uvarint(i.blob)
i._first += uint32(delta)
i.blob = i.blob[sz:]
for i._first <= limit && hasNext {
i._first = i.it.Next()
}
}

func (i *compressedPostingIterator) updateStats(s *Stats) {
s.IndexBytesLoaded += int64(len(i.orig) - len(i.blob))
func (i *bitmapIterator) updateStats(s *Stats) {
// TODO keegan confirm that bitmap.FromBuffer scans the whole buffer
s.IndexBytesLoaded += int64(i.size)
}

// mergingIterator forms the merge of a set of hitIterators, to
Expand Down
28 changes: 24 additions & 4 deletions indexbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
"path/filepath"
"sort"
"unicode/utf8"

"github.com/RoaringBitmap/roaring"
)

var _ = log.Println
Expand All @@ -42,6 +44,7 @@ const runeOffsetFrequency = 100

type postingsBuilder struct {
postings map[ngram][]byte
postingSets map[ngram]*roaring.Bitmap
lastOffsets map[ngram]uint32

// To support UTF-8 searching, we must map back runes to byte
Expand All @@ -65,6 +68,14 @@ func newPostingsBuilder() *postingsBuilder {
}
}

func newPostingSetsBuilder() *postingsBuilder {
return &postingsBuilder{
postingSets: map[ngram]*roaring.Bitmap{},
lastOffsets: map[ngram]uint32{},
isPlainASCII: true,
}
}

// Store trigram offsets for the given UTF-8 data. The
// DocumentSections must correspond to rune boundaries in the UTF-8
// data.
Expand Down Expand Up @@ -111,11 +122,20 @@ func (s *postingsBuilder) newSearchableString(data []byte, byteSections []Docume
}

ng := runesToNGram(runeGram)
lastOff := s.lastOffsets[ng]
newOff := endRune + uint32(runeIndex) - 2

m := binary.PutUvarint(buf[:], uint64(newOff-lastOff))
s.postings[ng] = append(s.postings[ng], buf[:m]...)
if s.postings == nil {
b := s.postingSets[ng]
if b == nil {
b = roaring.New()
s.postingSets[ng] = b
}
b.Add(newOff)
} else {
lastOff := s.lastOffsets[ng]
m := binary.PutUvarint(buf[:], uint64(newOff-lastOff))
s.postings[ng] = append(s.postings[ng], buf[:m]...)
}
s.lastOffsets[ng] = newOff
}
s.runeCount += runeIndex
Expand Down Expand Up @@ -193,7 +213,7 @@ func (b *IndexBuilder) ContentSize() uint32 {
// Repository contains repo metadata, and may be set to nil.
func NewIndexBuilder(r *Repository) (*IndexBuilder, error) {
b := &IndexBuilder{
contentPostings: newPostingsBuilder(),
contentPostings: newPostingSetsBuilder(),
namePostings: newPostingsBuilder(),
languageMap: map[string]byte{},
}
Expand Down
3 changes: 2 additions & 1 deletion toc.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ package zoekt
// 13: content checksums
// 14: languages
// 15: rune based symbol sections
const IndexFormatVersion = 15
// 16: roaring bitmaps for posting lists
const IndexFormatVersion = 16

// FeatureVersion is increased if a feature is added that requires reindexing data
// without changing the format version
Expand Down
6 changes: 6 additions & 0 deletions vendor/github.com/RoaringBitmap/roaring/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Empty file.
32 changes: 32 additions & 0 deletions vendor/github.com/RoaringBitmap/roaring/.travis.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions vendor/github.com/RoaringBitmap/roaring/AUTHORS

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions vendor/github.com/RoaringBitmap/roaring/CONTRIBUTORS

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading