From df88f51e825e3c914f2fd723ef77116bc6825881 Mon Sep 17 00:00:00 2001 From: Dmitri Shuralyov Date: Fri, 19 Oct 2018 02:09:50 -0400 Subject: [PATCH] internal/code: Rediscover only within the affected repository. This change is an optimization for code.Service.Rediscover method. Instead of always rediscovering the entire store, limit scope to just the affected repository. Time to Rediscover with sample test data went from 13 ms to 6 ms. This optimization will have a significant impact when the entire repository store is very large compared to the single repository being updated. --- internal/code/code.go | 84 +++++++++++++++---- internal/code/discover.go | 10 ++- internal/code/git.go | 2 +- internal/code/replacedirs_test.go | 130 ++++++++++++++++++++++++++++++ 4 files changed, 205 insertions(+), 21 deletions(-) create mode 100644 internal/code/replacedirs_test.go diff --git a/internal/code/code.go b/internal/code/code.go index d3ff628..c082306 100644 --- a/internal/code/code.go +++ b/internal/code/code.go @@ -126,45 +126,97 @@ func (s *Service) CreateRepo(ctx context.Context, repoSpec, description string) func insertDir(s *[]*Directory, dir *Directory) { // Use binary search to find index where dir should be inserted, // and insert it directly there. - i := sort.Search(len(*s), func(i int) bool { return (*s)[i].ImportPath >= dir.ImportPath }) + i := sort.Search(len(*s), func(i int) bool { return (*s)[i].RepoRoot >= dir.RepoRoot }) *s = append(*s, nil) copy((*s)[i+1:], (*s)[i:]) (*s)[i] = dir } -// Rediscover rediscovers all code in the repository store. +// Rediscover rediscovers code in repoRoot of the repository store. // It returns packages that have been added and removed. -func (s *Service) Rediscover() (added, removed []*Directory, err error) { - // TODO: Can optimize this by rediscovering selectively (only the affected repo and its parent dirs). - dirs, byImportPath, err := discover(s.reposDir) +func (s *Service) Rediscover(repoRoot string) (added, removed []*Directory, err error) { + gitDir := filepath.Join(s.reposDir, filepath.FromSlash(repoRoot)) + newDirs, err := walkRepository(gitDir, repoRoot) if err != nil { return nil, nil, err } s.mu.Lock() - oldDirs := s.dirs - oldByImportPath := s.byImportPath - s.dirs = dirs - s.byImportPath = byImportPath + oldDirs := replaceDirs(&s.dirs, repoRoot, newDirs) + replaceDirsMap(s.byImportPath, oldDirs, newDirs) + populateLicenseRoot(newDirs, s.byImportPath) s.mu.Unlock() // Compute added, removed packages. - for _, d := range dirs { - if d.Package != nil && !dirExistsAndHasPackage(oldByImportPath[d.ImportPath]) { - added = append(added, d) + for _, d := range newDirs { + if d.Package == nil || containsPackage(oldDirs, d.ImportPath) { + continue } + added = append(added, d) } for _, d := range oldDirs { - if d.Package != nil && !dirExistsAndHasPackage(byImportPath[d.ImportPath]) { - removed = append(removed, d) + if d.Package == nil || containsPackage(newDirs, d.ImportPath) { + continue } + removed = append(removed, d) } return added, removed, nil } -// dirExistsAndHasPackage reports whether dir exists and contains a Go package. -func dirExistsAndHasPackage(dir *Directory) bool { return dir != nil && dir.Package != nil } +// replaceDirs replaces directories with repoRoot in the sorted slice s +// with newDirs, keeping the slice sorted. It returns old directories that got replaced. +func replaceDirs(s *[]*Directory, repoRoot string, newDirs []*Directory) (oldDirs []*Directory) { + // Use binary search to find index where directories should be replaced, + // and replace them directly there. + // i is the start index of old directories, and j is the end index. + i := sort.Search(len(*s), func(i int) bool { return (*s)[i].RepoRoot >= repoRoot }) + old := 0 // Number of old directories to replace. + for i+old < len(*s) && (*s)[i+old].RepoRoot == repoRoot { + old++ + } + j := i + old + + // Make a copy of old directories before they're overwritten. + oldDirs = make([]*Directory, old) + copy(oldDirs, (*s)[i:j]) + + // Grow/shrink the slice by delta, and copy new directories into place. + switch delta := len(newDirs) - len(oldDirs); { + case delta > 0: + // Grow s by delta. + *s = append(*s, make([]*Directory, delta)...) + copy((*s)[j+delta:], (*s)[j:]) + case delta < 0: + // Shrink s by delta. + copy((*s)[j+delta:], (*s)[j:]) + copy((*s)[len(*s)+delta:], make([]*Directory, -delta)) + *s = (*s)[:len(*s)+delta] + } + copy((*s)[i:], newDirs) + + return oldDirs +} + +func replaceDirsMap(m map[string]*Directory, oldDirs, newDirs []*Directory) { + for _, d := range oldDirs { + delete(m, d.ImportPath) + } + for _, d := range newDirs { + m[d.ImportPath] = d + } +} + +// containsPackage reports whether dirs contains a Go package with matching importPath. +func containsPackage(dirs []*Directory, importPath string) bool { + for _, d := range dirs { + if d.ImportPath != importPath { + continue + } + return d.Package != nil + } + return false +} // Directory represents a directory inside a repository store. type Directory struct { diff --git a/internal/code/discover.go b/internal/code/discover.go index 6d99f8a..fc86c1f 100644 --- a/internal/code/discover.go +++ b/internal/code/discover.go @@ -32,9 +32,13 @@ func discover(reposDir string) ([]*Directory, map[string]*Directory, error) { for _, d := range dirs { byImportPath[d.ImportPath] = d } + populateLicenseRoot(dirs, byImportPath) + return dirs, byImportPath, nil +} - // Populate LicenseRoot values for remaining directories - // that don't directly contain a LICENSE file. +// populateLicenseRoot populates LicenseRoot values for +// directories that don't directly contain a LICENSE file. +func populateLicenseRoot(dirs []*Directory, byImportPath map[string]*Directory) { for _, dir := range dirs { if dir.HasLicenseFile() { continue @@ -48,8 +52,6 @@ func discover(reposDir string) ([]*Directory, map[string]*Directory, error) { } } } - - return dirs, byImportPath, nil } // walkRepositoryStore walks the repository store at reposDir, diff --git a/internal/code/git.go b/internal/code/git.go index ca1d88a..cbf32a4 100644 --- a/internal/code/git.go +++ b/internal/code/git.go @@ -296,7 +296,7 @@ func (h *gitHandler) serveGitReceivePack(w http.ResponseWriter, req *http.Reques log.Println(err) } - added, _, err := h.code.Rediscover() + added, _, err := h.code.Rediscover(repo.Spec) if err != nil { log.Println("h.code.Rediscover:", err) } diff --git a/internal/code/replacedirs_test.go b/internal/code/replacedirs_test.go new file mode 100644 index 0000000..1c73559 --- /dev/null +++ b/internal/code/replacedirs_test.go @@ -0,0 +1,130 @@ +package code + +import ( + "reflect" + "testing" +) + +func TestReplaceDirs(t *testing.T) { + tests := []struct { + s []*Directory + repoRoot string + dirs []*Directory + want []*Directory + }{ + // 2 -> 1 (shrink by 1). + { + s: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "b", ImportPath: "b/1"}, + {RepoRoot: "b", ImportPath: "b/2"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + repoRoot: "b", + dirs: []*Directory{ + {RepoRoot: "b", ImportPath: "b/3"}, + }, + want: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "b", ImportPath: "b/3"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + }, + // 2 -> 2 (stay same size). + { + s: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "b", ImportPath: "b/1"}, + {RepoRoot: "b", ImportPath: "b/2"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + repoRoot: "b", + dirs: []*Directory{ + {RepoRoot: "b", ImportPath: "b/3"}, + {RepoRoot: "b", ImportPath: "b/4"}, + }, + want: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "b", ImportPath: "b/3"}, + {RepoRoot: "b", ImportPath: "b/4"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + }, + // 2 -> 3 (grow by 1). + { + s: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "b", ImportPath: "b/1"}, + {RepoRoot: "b", ImportPath: "b/2"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + repoRoot: "b", + dirs: []*Directory{ + {RepoRoot: "b", ImportPath: "b/3"}, + {RepoRoot: "b", ImportPath: "b/4"}, + {RepoRoot: "b", ImportPath: "b/5"}, + }, + want: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "b", ImportPath: "b/3"}, + {RepoRoot: "b", ImportPath: "b/4"}, + {RepoRoot: "b", ImportPath: "b/5"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + }, + + // 2 -> 0. + { + s: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "b", ImportPath: "b/1"}, + {RepoRoot: "b", ImportPath: "b/2"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + repoRoot: "b", + dirs: nil, + want: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + }, + // 0 -> 2. + { + s: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + repoRoot: "b", + dirs: []*Directory{ + {RepoRoot: "b", ImportPath: "b/1"}, + {RepoRoot: "b", ImportPath: "b/2"}, + }, + want: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "b", ImportPath: "b/1"}, + {RepoRoot: "b", ImportPath: "b/2"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + }, + // 0 -> 0. + { + s: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + repoRoot: "b", + dirs: nil, + want: []*Directory{ + {RepoRoot: "a", ImportPath: "a"}, + {RepoRoot: "c", ImportPath: "c"}, + }, + }, + } + for _, tc := range tests { + got := tc.s + replaceDirs(&got, tc.repoRoot, tc.dirs) + if !reflect.DeepEqual(got, tc.want) { + t.Error("not equal") + } + } +}