Permalink
Browse files

internal/code: Rediscover only within the affected repository.

This change is an optimization for code.Service.Rediscover method.
Instead of always rediscovering the entire store, limit scope to
just the affected repository.

Time to Rediscover with sample test data went from 13 ms to 6 ms.

This optimization will have a significant impact when the entire
repository store is very large compared to the single repository
being updated.

Increase test coverage, since some of the logic is non-trivial.
  • Loading branch information...
dmitshur committed Oct 19, 2018
1 parent 5317c0d commit de4db26b5c602633e16e2a1507b12a168b414311
Showing with 227 additions and 21 deletions.
  1. +68 −16 internal/code/code.go
  2. +6 −4 internal/code/discover.go
  3. +1 −1 internal/code/git.go
  4. +152 −0 internal/code/replacedirs_test.go
View
@@ -126,45 +126,97 @@ func (s *Service) CreateRepo(ctx context.Context, repoSpec, description string)
func insertDir(s *[]*Directory, dir *Directory) {
// Use binary search to find index where dir should be inserted,
// and insert it directly there.
i := sort.Search(len(*s), func(i int) bool { return (*s)[i].ImportPath >= dir.ImportPath })
i := sort.Search(len(*s), func(i int) bool { return (*s)[i].RepoRoot >= dir.RepoRoot })
*s = append(*s, nil)
copy((*s)[i+1:], (*s)[i:])
(*s)[i] = dir
}
// Rediscover rediscovers all code in the repository store.
// Rediscover rediscovers code in repoRoot of the repository store.
// It returns packages that have been added and removed.
func (s *Service) Rediscover() (added, removed []*Directory, err error) {
// TODO: Can optimize this by rediscovering selectively (only the affected repo and its parent dirs).
dirs, byImportPath, err := discover(s.reposDir)
func (s *Service) Rediscover(repoRoot string) (added, removed []*Directory, err error) {
gitDir := filepath.Join(s.reposDir, filepath.FromSlash(repoRoot))
newDirs, err := walkRepository(gitDir, repoRoot)
if err != nil {
return nil, nil, err
}
s.mu.Lock()
oldDirs := s.dirs
oldByImportPath := s.byImportPath
s.dirs = dirs
s.byImportPath = byImportPath
oldDirs := replaceDirs(&s.dirs, repoRoot, newDirs)
replaceDirsMap(s.byImportPath, oldDirs, newDirs)
populateLicenseRoot(newDirs, s.byImportPath)
s.mu.Unlock()
// Compute added, removed packages.
for _, d := range dirs {
if d.Package != nil && !dirExistsAndHasPackage(oldByImportPath[d.ImportPath]) {
added = append(added, d)
for _, d := range newDirs {
if d.Package == nil || containsPackage(oldDirs, d.ImportPath) {
continue
}
added = append(added, d)
}
for _, d := range oldDirs {
if d.Package != nil && !dirExistsAndHasPackage(byImportPath[d.ImportPath]) {
removed = append(removed, d)
if d.Package == nil || containsPackage(newDirs, d.ImportPath) {
continue
}
removed = append(removed, d)
}
return added, removed, nil
}
// dirExistsAndHasPackage reports whether dir exists and contains a Go package.
func dirExistsAndHasPackage(dir *Directory) bool { return dir != nil && dir.Package != nil }
// replaceDirs replaces directories with repoRoot in the sorted slice s
// with newDirs, keeping the slice sorted. It returns old directories that got replaced.
func replaceDirs(s *[]*Directory, repoRoot string, newDirs []*Directory) (oldDirs []*Directory) {
// Use binary search to find index where directories should be replaced,
// and replace them directly there.
// i is the start index of old directories, and j is the end index.
i := sort.Search(len(*s), func(i int) bool { return (*s)[i].RepoRoot >= repoRoot })
old := 0 // Number of old directories to replace.
for i+old < len(*s) && (*s)[i+old].RepoRoot == repoRoot {
old++
}
j := i + old
// Make a copy of old directories before they're overwritten.
oldDirs = make([]*Directory, old)
copy(oldDirs, (*s)[i:j])
// Grow/shrink the slice by delta, and copy new directories into place.
switch delta := len(newDirs) - len(oldDirs); {
case delta > 0:
// Grow s by delta.
*s = append(*s, make([]*Directory, delta)...)
copy((*s)[j+delta:], (*s)[j:])
case delta < 0:
// Shrink s by delta.
copy((*s)[j+delta:], (*s)[j:])
copy((*s)[len(*s)+delta:], make([]*Directory, -delta))
*s = (*s)[:len(*s)+delta]
}
copy((*s)[i:], newDirs)
return oldDirs
}
func replaceDirsMap(m map[string]*Directory, oldDirs, newDirs []*Directory) {
for _, d := range oldDirs {
delete(m, d.ImportPath)
}
for _, d := range newDirs {
m[d.ImportPath] = d
}
}
// containsPackage reports whether dirs contains a Go package with matching importPath.
func containsPackage(dirs []*Directory, importPath string) bool {
for _, d := range dirs {
if d.ImportPath != importPath {
continue
}
return d.Package != nil
}
return false
}
// Directory represents a directory inside a repository store.
type Directory struct {
View
@@ -32,9 +32,13 @@ func discover(reposDir string) ([]*Directory, map[string]*Directory, error) {
for _, d := range dirs {
byImportPath[d.ImportPath] = d
}
populateLicenseRoot(dirs, byImportPath)
return dirs, byImportPath, nil
}
// Populate LicenseRoot values for remaining directories
// that don't directly contain a LICENSE file.
// populateLicenseRoot populates LicenseRoot values for
// directories that don't directly contain a LICENSE file.
func populateLicenseRoot(dirs []*Directory, byImportPath map[string]*Directory) {
for _, dir := range dirs {
if dir.HasLicenseFile() {
continue
@@ -48,8 +52,6 @@ func discover(reposDir string) ([]*Directory, map[string]*Directory, error) {
}
}
}
return dirs, byImportPath, nil
}
// walkRepositoryStore walks the repository store at reposDir,
View
@@ -296,7 +296,7 @@ func (h *gitHandler) serveGitReceivePack(w http.ResponseWriter, req *http.Reques
log.Println(err)
}
added, _, err := h.code.Rediscover()
added, _, err := h.code.Rediscover(repo.Spec)
if err != nil {
log.Println("h.code.Rediscover:", err)
}
@@ -0,0 +1,152 @@
package code
import (
"reflect"
"testing"
)
func TestReplaceDirs(t *testing.T) {
tests := []struct {
s []*Directory
repoRoot string
dirs []*Directory
want []*Directory
wantOldDirs []*Directory
}{
// 2 -> 1 (shrink by 1).
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/3"},
},
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "c", ImportPath: "c"},
},
wantOldDirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
},
},
// 2 -> 2 (stay same size).
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "b", ImportPath: "b/4"},
},
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "b", ImportPath: "b/4"},
{RepoRoot: "c", ImportPath: "c"},
},
wantOldDirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
},
},
// 2 -> 3 (grow by 1).
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "b", ImportPath: "b/4"},
{RepoRoot: "b", ImportPath: "b/5"},
},
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/3"},
{RepoRoot: "b", ImportPath: "b/4"},
{RepoRoot: "b", ImportPath: "b/5"},
{RepoRoot: "c", ImportPath: "c"},
},
wantOldDirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
},
},
// 2 -> 0.
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: nil,
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "c", ImportPath: "c"},
},
wantOldDirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
},
},
// 0 -> 2.
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: []*Directory{
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
},
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "b", ImportPath: "b/1"},
{RepoRoot: "b", ImportPath: "b/2"},
{RepoRoot: "c", ImportPath: "c"},
},
wantOldDirs: []*Directory{},
},
// 0 -> 0.
{
s: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "c", ImportPath: "c"},
},
repoRoot: "b",
dirs: nil,
want: []*Directory{
{RepoRoot: "a", ImportPath: "a"},
{RepoRoot: "c", ImportPath: "c"},
},
wantOldDirs: []*Directory{},
},
}
for _, tc := range tests {
got := tc.s
gotOldDirs := replaceDirs(&got, tc.repoRoot, tc.dirs)
if !reflect.DeepEqual(got, tc.want) {
t.Error("not equal")
}
if !reflect.DeepEqual(gotOldDirs, tc.wantOldDirs) {
t.Error("oldDirs directories not equal")
}
}
}

0 comments on commit de4db26

Please sign in to comment.