Skip to content

Commit

Permalink
internal/code: Rediscover only within the affected repository.
Browse files Browse the repository at this point in the history
This change is an optimization for code.Service.Rediscover method.
Instead of always rediscovering the entire store, limit scope to
just the affected repository.

Time to Rediscover with sample test data went from 13 ms to 6 ms.

This optimization will have a significant impact when the entire
repository store is very large compared to the single repository
being updated.
  • Loading branch information
dmitshur committed Nov 4, 2018
1 parent 5317c0d commit df88f51
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 21 deletions.
84 changes: 68 additions & 16 deletions internal/code/code.go
Expand Up @@ -126,45 +126,97 @@ func (s *Service) CreateRepo(ctx context.Context, repoSpec, description string)
func insertDir(s *[]*Directory, dir *Directory) {
// Use binary search to find index where dir should be inserted,
// and insert it directly there.
i := sort.Search(len(*s), func(i int) bool { return (*s)[i].ImportPath >= dir.ImportPath })
i := sort.Search(len(*s), func(i int) bool { return (*s)[i].RepoRoot >= dir.RepoRoot })
*s = append(*s, nil)
copy((*s)[i+1:], (*s)[i:])
(*s)[i] = dir
}

// Rediscover rediscovers all code in the repository store.
// Rediscover rediscovers code in repoRoot of the repository store.
// It returns packages that have been added and removed.
func (s *Service) Rediscover() (added, removed []*Directory, err error) {
// TODO: Can optimize this by rediscovering selectively (only the affected repo and its parent dirs).
dirs, byImportPath, err := discover(s.reposDir)
func (s *Service) Rediscover(repoRoot string) (added, removed []*Directory, err error) {
gitDir := filepath.Join(s.reposDir, filepath.FromSlash(repoRoot))
newDirs, err := walkRepository(gitDir, repoRoot)
if err != nil {
return nil, nil, err
}

s.mu.Lock()
oldDirs := s.dirs
oldByImportPath := s.byImportPath
s.dirs = dirs
s.byImportPath = byImportPath
oldDirs := replaceDirs(&s.dirs, repoRoot, newDirs)
replaceDirsMap(s.byImportPath, oldDirs, newDirs)
populateLicenseRoot(newDirs, s.byImportPath)
s.mu.Unlock()

// Compute added, removed packages.
for _, d := range dirs {
if d.Package != nil && !dirExistsAndHasPackage(oldByImportPath[d.ImportPath]) {
added = append(added, d)
for _, d := range newDirs {
if d.Package == nil || containsPackage(oldDirs, d.ImportPath) {
continue
}
added = append(added, d)
}
for _, d := range oldDirs {
if d.Package != nil && !dirExistsAndHasPackage(byImportPath[d.ImportPath]) {
removed = append(removed, d)
if d.Package == nil || containsPackage(newDirs, d.ImportPath) {
continue
}
removed = append(removed, d)
}

return added, removed, nil
}

// dirExistsAndHasPackage reports whether dir exists and contains a Go package.
func dirExistsAndHasPackage(dir *Directory) bool { return dir != nil && dir.Package != nil }
// replaceDirs replaces directories with repoRoot in the sorted slice s
// with newDirs, keeping the slice sorted. It returns old directories that got replaced.
func replaceDirs(s *[]*Directory, repoRoot string, newDirs []*Directory) (oldDirs []*Directory) {
// Use binary search to find index where directories should be replaced,
// and replace them directly there.
// i is the start index of old directories, and j is the end index.
i := sort.Search(len(*s), func(i int) bool { return (*s)[i].RepoRoot >= repoRoot })
old := 0 // Number of old directories to replace.
for i+old < len(*s) && (*s)[i+old].RepoRoot == repoRoot {
old++
}
j := i + old

// Make a copy of old directories before they're overwritten.
oldDirs = make([]*Directory, old)
copy(oldDirs, (*s)[i:j])

// Grow/shrink the slice by delta, and copy new directories into place.
switch delta := len(newDirs) - len(oldDirs); {
case delta > 0:
// Grow s by delta.
*s = append(*s, make([]*Directory, delta)...)
copy((*s)[j+delta:], (*s)[j:])
case delta < 0:
// Shrink s by delta.
copy((*s)[j+delta:], (*s)[j:])
copy((*s)[len(*s)+delta:], make([]*Directory, -delta))
*s = (*s)[:len(*s)+delta]
}
copy((*s)[i:], newDirs)

return oldDirs
}

func replaceDirsMap(m map[string]*Directory, oldDirs, newDirs []*Directory) {
for _, d := range oldDirs {
delete(m, d.ImportPath)
}
for _, d := range newDirs {
m[d.ImportPath] = d
}
}

// containsPackage reports whether dirs contains a Go package with matching importPath.
func containsPackage(dirs []*Directory, importPath string) bool {
for _, d := range dirs {
if d.ImportPath != importPath {
continue
}
return d.Package != nil
}
return false
}

// Directory represents a directory inside a repository store.
type Directory struct {
Expand Down
10 changes: 6 additions & 4 deletions internal/code/discover.go
Expand Up @@ -32,9 +32,13 @@ func discover(reposDir string) ([]*Directory, map[string]*Directory, error) {
for _, d := range dirs {
byImportPath[d.ImportPath] = d
}
populateLicenseRoot(dirs, byImportPath)
return dirs, byImportPath, nil
}

// Populate LicenseRoot values for remaining directories
// that don't directly contain a LICENSE file.
// populateLicenseRoot populates LicenseRoot values for
// directories that don't directly contain a LICENSE file.
func populateLicenseRoot(dirs []*Directory, byImportPath map[string]*Directory) {
for _, dir := range dirs {
if dir.HasLicenseFile() {
continue
Expand All @@ -48,8 +52,6 @@ func discover(reposDir string) ([]*Directory, map[string]*Directory, error) {
}
}
}

return dirs, byImportPath, nil
}

// walkRepositoryStore walks the repository store at reposDir,
Expand Down
2 changes: 1 addition & 1 deletion internal/code/git.go
Expand Up @@ -296,7 +296,7 @@ func (h *gitHandler) serveGitReceivePack(w http.ResponseWriter, req *http.Reques
log.Println(err)
}

added, _, err := h.code.Rediscover()
added, _, err := h.code.Rediscover(repo.Spec)
if err != nil {
log.Println("h.code.Rediscover:", err)
}
Expand Down
130 changes: 130 additions & 0 deletions internal/code/replacedirs_test.go
@@ -0,0 +1,130 @@
package code

import (
"reflect"
"testing"
)

func TestReplaceDirs(t *testing.T) {
tests := []struct {
s []*Directory
repoRoot string
dirs []*Directory
want []*Directory
}{
// 2 -> 1 (shrink by 1).
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/3"},
},
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "c", ImportPath: "c"},
},
},
// 2 -> 2 (stay same size).
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "b", ImportPath: "b/4"},
},
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "b", ImportPath: "b/4"},
{RepoRoot: "c", ImportPath: "c"},
},
},
// 2 -> 3 (grow by 1).
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "b", ImportPath: "b/4"},
{RepoRoot: "b", ImportPath: "b/5"},
},
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "b", ImportPath: "b/4"},
{RepoRoot: "b", ImportPath: "b/5"},
{RepoRoot: "c", ImportPath: "c"},
},
},

// 2 -> 0.
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: nil,
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "c", ImportPath: "c"},
},
},
// 0 -> 2.
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
},
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
},
// 0 -> 0.
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: nil,
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "c", ImportPath: "c"},
},
},
}
for _, tc := range tests {
got := tc.s
replaceDirs(&got, tc.repoRoot, tc.dirs)
if !reflect.DeepEqual(got, tc.want) {
t.Error("not equal")
}
}
}

0 comments on commit df88f51

Please sign in to comment.