diff --git a/CHANGELOG.md b/CHANGELOG.md index 539eadf72..479c8976b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added `commit_file_stats` function. - Added documentation about `commit_stats`. ### Changed @@ -17,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - internal/function: take into account if repository is resolved in commit_stats ([#863](https://github.com/src-d/gitbase/pull/863)) +- internal/function: `Files` field in `commit_stats` contains now proper results. ### Changed diff --git a/docs/using-gitbase/functions.md b/docs/using-gitbase/functions.md index 6cef3d146..7fb25d517 100644 --- a/docs/using-gitbase/functions.md +++ b/docs/using-gitbase/functions.md @@ -6,7 +6,8 @@ To make some common tasks easier for the user, there are some functions to inter | Name | Description | |:-------------|:-------------------------------------------------------------------------------------------------------------------------------| -|`commit_stats(repository_id, [from_commit_hash], to_commit_hash)`|returns the stats between two commits for a repository. If from is empty, it will compare the given `to_commit_hash` with its parent commit| +|`commit_stats(repository_id, [from_commit_hash], to_commit_hash)`|returns the stats between two commits for a repository. If from is empty, it will compare the given `to_commit_hash` with its parent commit. Vendored files stats are not included in the result of this function.| +|`commit_file_stats(repository_id, [from_commit_hash], to_commit_hash)`|returns an array with the stats of each file in `to_commit_hash` since the given `from_commit_hash`. If from is not given, the parent commit will be used. Vendored files stats are not included in the result of this function.| |`is_remote(reference_name)bool`| check if the given reference name is from a remote one | |`is_tag(reference_name)bool`| check if the given reference name is a tag | |`is_vendor(file_path)bool`| check if the given file name is a vendored file | diff --git a/internal/commitstats/commit.go b/internal/commitstats/commit.go new file mode 100644 index 000000000..f8b1fb1b7 --- /dev/null +++ b/internal/commitstats/commit.go @@ -0,0 +1,60 @@ +package commitstats + +import ( + "fmt" + + "gopkg.in/src-d/go-git.v4" + "gopkg.in/src-d/go-git.v4/plumbing/object" +) + +// CommitStats represents the stats for a commit. +type CommitStats struct { + // Files add/modified/removed by this commit. + Files int + // Code stats of the code lines. + Code KindStats + // Comment stats of the comment lines. + Comment KindStats + // Blank stats of the blank lines. + Blank KindStats + // Other stats of files that are not from a recognized or format language. + Other KindStats + // Total the sum of the previous stats. + Total KindStats +} + +func (s *CommitStats) String() string { + return fmt.Sprintf("Code (+%d/-%d)\nComment (+%d/-%d)\nBlank (+%d/-%d)\nOther (+%d/-%d)\nTotal (+%d/-%d)\nFiles (%d)\n", + s.Code.Additions, s.Code.Deletions, + s.Comment.Additions, s.Comment.Deletions, + s.Blank.Additions, s.Blank.Deletions, + s.Other.Additions, s.Other.Deletions, + s.Total.Additions, s.Total.Deletions, + s.Files, + ) +} + +// Calculate calculates the CommitStats for from commit to another. +// if from is nil the first parent is used, if the commit is orphan the stats +// are compared against a empty commit. +func Calculate(r *git.Repository, from, to *object.Commit) (*CommitStats, error) { + fs, err := CalculateByFile(r, from, to) + if err != nil { + return nil, err + } + + return commitStatsFromCommitFileStats(fs), nil +} + +func commitStatsFromCommitFileStats(fs []CommitFileStats) *CommitStats { + var s CommitStats + for _, f := range fs { + s.Blank.Add(f.Blank) + s.Comment.Add(f.Comment) + s.Code.Add(f.Code) + s.Other.Add(f.Other) + s.Total.Add(f.Total) + s.Files++ + } + return &s +} diff --git a/internal/commitstats/commit_stats.go b/internal/commitstats/commit_stats.go deleted file mode 100644 index 08d05f04f..000000000 --- a/internal/commitstats/commit_stats.go +++ /dev/null @@ -1,368 +0,0 @@ -package commitstats - -import ( - "bufio" - "bytes" - "fmt" - "io" - - "github.com/hhatto/gocloc" - "gopkg.in/src-d/enry.v1" - "gopkg.in/src-d/go-git.v4" - "gopkg.in/src-d/go-git.v4/plumbing/object" - "gopkg.in/src-d/go-git.v4/utils/binary" - "gopkg.in/src-d/go-git.v4/utils/ioutil" - "gopkg.in/src-d/go-git.v4/utils/merkletrie" -) - -// LineKind defines the kind of a line in a file. -type LineKind int - -const ( - // Code represents a line of code. - Code LineKind = iota + 1 - // Comment represents a line of comment. - Comment - // Blank represents an empty line. - Blank - // Other represents a line from any other kind. - Other -) - -// Calculate calculates the CommitStats for from commit to another. -// if from is nil the first parent is used, if the commit is orphan the stats -// are compared against a empty commit. -func Calculate(r *git.Repository, from, to *object.Commit) (*CommitStats, error) { - cc := &commitStatsCalculator{} - - var err error - if to.NumParents() != 0 && from == nil { - from, err = to.Parent(0) - if err != nil { - return nil, err - } - } - - if from == nil { - return cc.doCommit(to) - } - - return cc.doDiff(r, from, to) -} - -type commitStatsCalculator struct{} - -func (cc *commitStatsCalculator) doCommit(c *object.Commit) (*CommitStats, error) { - files, err := c.Files() - if err != nil { - return nil, err - } - - stats := &CommitStats{} - return stats, files.ForEach(func(f *object.File) error { - fi, err := cc.doBlob(&f.Blob, f.Name) - if err != nil { - return err - } - - stats.Add(fi.stats()) - stats.Files++ - return nil - }) -} - -func (cc *commitStatsCalculator) doDiff(r *git.Repository, from, to *object.Commit) (*CommitStats, error) { - ch, err := cc.computeDiff(from, to) - if err != nil { - return nil, err - } - - stats := &CommitStats{} - for _, change := range ch { - s, err := cc.doChange(r, change) - if err != nil { - return nil, err - } - - stats.Add(s) - stats.Files++ - - } - - return stats, nil -} - -func (cc *commitStatsCalculator) computeDiff(from, to *object.Commit) (object.Changes, error) { - src, err := to.Tree() - if err != nil { - return nil, err - } - - dst, err := from.Tree() - if err != nil { - return nil, err - } - - return object.DiffTree(dst, src) -} - -func (cc *commitStatsCalculator) doChange(r *git.Repository, ch *object.Change) (*CommitStats, error) { - a, err := ch.Action() - if err != nil { - return nil, err - } - - var fi fileStats - - switch a { - case merkletrie.Delete: - fi, err = cc.doChangeEntry(r, &ch.From) - if err != nil { - return nil, err - } - case merkletrie.Insert: - fi, err = cc.doChangeEntry(r, &ch.To) - if err != nil { - return nil, err - } - case merkletrie.Modify: - src, err := cc.doChangeEntry(r, &ch.From) - if err != nil { - return nil, err - } - - dst, err := cc.doChangeEntry(r, &ch.To) - if err != nil { - return nil, err - } - - if src == nil { - src = make(fileStats) - } - - if dst == nil { - dst = make(fileStats) - } - - dst.sub(src) - fi = dst - } - - return fi.stats(), nil -} - -func (cc *commitStatsCalculator) doChangeEntry(r *git.Repository, ch *object.ChangeEntry) (fileStats, error) { - blob, err := r.BlobObject(ch.TreeEntry.Hash) - if err != nil { - return nil, err - } - - return cc.doBlob(blob, ch.Name) -} - -func (cc *commitStatsCalculator) doBlob(blob *object.Blob, filename string) (fileStats, error) { - if enry.IsVendor(filename) { - return nil, nil - } - - isBinary, err := isBinary(blob) - if err != nil { - return nil, err - } - - if isBinary { - return nil, nil - } - - lang := cc.getLanguage(filename) - - return newFileStats(blob, lang) -} - -func (*commitStatsCalculator) getLanguage(filename string) string { - if lang, ok := enry.GetLanguageByFilename(filename); ok { - return lang - } - - if lang, ok := enry.GetLanguageByExtension(filename); ok { - return lang - } - - return "" -} - -// KindStats represents the stats for a kind of lines in a file. -type KindStats struct { - // Additions number of lines added. - Additions int - // Deletions number of lines deleted. - Deletions int -} - -// Add adds the given stats to this stats. -func (k *KindStats) Add(add KindStats) { - k.Additions += add.Additions - k.Deletions += add.Deletions -} - -// CommitStats represents the stats for a commit. -type CommitStats struct { - // Files add/modified/removed by this commit. - Files int - // Code stats of the code lines. - Code KindStats - // Comment stats of the comment lines. - Comment KindStats - // Blank stats of the blank lines. - Blank KindStats - // Other stats of files that are not from a recognized or format language. - Other KindStats - // Total the sum of the previous stats. - Total KindStats -} - -// Add adds the given stats to this stats. -func (s *CommitStats) Add(stats *CommitStats) { - s.Code.Add(stats.Code) - s.Comment.Add(stats.Comment) - s.Blank.Add(stats.Blank) - s.Other.Add(stats.Other) - s.Total.Add(stats.Total) -} - -func (s *CommitStats) String() string { - return fmt.Sprintf("Code (+%d/-%d)\nComment (+%d/-%d)\nBlank (+%d/-%d)\nOther (+%d/-%d)\nTotal (+%d/-%d)\nFiles (%d)\n", - s.Code.Additions, s.Code.Deletions, - s.Comment.Additions, s.Comment.Deletions, - s.Blank.Additions, s.Blank.Deletions, - s.Other.Additions, s.Other.Deletions, - s.Total.Additions, s.Total.Deletions, - s.Files, - ) -} - -var languages = gocloc.NewDefinedLanguages() - -type fileStats map[string]*LineInfo - -// LineInfo represents the information about a sigle line. -type LineInfo struct { - Kind LineKind - Count int -} - -func newFileStats(f *object.Blob, lang string) (fileStats, error) { - ff := make(fileStats, 50) - - r, err := f.Reader() - if err != nil { - return ff, err - } - - defer ioutil.CheckClose(r, &err) - - l, ok := languages.Langs[lang] - if ok { - doNewFileStatsGoCloc(r, l, &ff) - return ff, nil - } - - return ff, doNewFileStatsPlain(r, &ff) -} - -func doNewFileStatsGoCloc(r io.Reader, l *gocloc.Language, ff *fileStats) { - gocloc.AnalyzeReader("", l, r, &gocloc.ClocOptions{ - OnBlank: ff.addBlank, - OnCode: ff.addCode, - OnComment: ff.addComment, - }) -} - -func doNewFileStatsPlain(r io.Reader, ff *fileStats) error { - s := bufio.NewScanner(r) - for s.Scan() { - ff.addOther(s.Text()) - - } - - return s.Err() -} - -func (fi fileStats) addCode(line string) { fi.add(line, Code) } -func (fi fileStats) addComment(line string) { fi.add(line, Comment) } -func (fi fileStats) addBlank(line string) { fi.add(line, Blank) } -func (fi fileStats) addOther(line string) { fi.add(line, Other) } -func (fi fileStats) add(line string, k LineKind) { - if fi[line] == nil { - fi[line] = &LineInfo{} - } - - fi[line].Count++ - fi[line].Kind = k -} - -func (fi fileStats) sub(to fileStats) { - for line, i := range to { - if _, ok := fi[line]; ok { - fi[line].Count -= i.Count - } else { - fi[line] = i - fi[line].Count *= -1 - } - } -} - -func (fi fileStats) stats() *CommitStats { - stats := &CommitStats{} - for _, info := range fi { - fillKindStats(&stats.Total, info) - switch info.Kind { - case Code: - fillKindStats(&stats.Code, info) - case Comment: - fillKindStats(&stats.Comment, info) - case Blank: - fillKindStats(&stats.Blank, info) - case Other: - fillKindStats(&stats.Other, info) - } - } - - return stats -} - -func fillKindStats(ks *KindStats, info *LineInfo) { - if info.Count > 0 { - ks.Additions += info.Count - } - if info.Count < 0 { - ks.Deletions += (info.Count * -1) - } -} - -func (fi fileStats) String() string { - buf := bytes.NewBuffer(nil) - for line, i := range fi { - sign := ' ' - switch { - case i.Count > 0: - sign = '+' - case i.Count < 0: - sign = '-' - } - - fmt.Fprintf(buf, "%c [%3dx] %s\n", sign, i.Count, line) - } - - return buf.String() -} - -func isBinary(b *object.Blob) (bin bool, err error) { - reader, err := b.Reader() - if err != nil { - return false, err - } - - defer ioutil.CheckClose(reader, &err) - return binary.IsBinary(reader) -} diff --git a/internal/commitstats/commit_stats_test.go b/internal/commitstats/commit_test.go similarity index 79% rename from internal/commitstats/commit_stats_test.go rename to internal/commitstats/commit_test.go index e78b26e7e..88bebbd6b 100644 --- a/internal/commitstats/commit_stats_test.go +++ b/internal/commitstats/commit_test.go @@ -5,7 +5,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "gopkg.in/src-d/go-git-fixtures.v3" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" @@ -43,7 +43,7 @@ func TestCalculate(t *testing.T) { fixture: fixtures.Basic().One(), to: plumbing.NewHash("b029517f6300c2da0f4b651b8642506cd6aaf45d"), expected: &CommitStats{ - Files: 2, + Files: 1, Other: KindStats{Additions: 22, Deletions: 0}, Total: KindStats{Additions: 22, Deletions: 0}, }, @@ -68,7 +68,7 @@ func TestCalculate(t *testing.T) { fixture: fixtures.Basic().One(), to: plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5"), expected: &CommitStats{ - Files: 1, + Files: 0, }, }, "with_from": { @@ -102,31 +102,3 @@ func TestCalculate(t *testing.T) { }) } } - -func TestNewFileStats(t *testing.T) { - require := require.New(t) - - err := fixtures.Init() - require.NoError(err) - - defer func() { - err := fixtures.Clean() - require.NoError(err) - }() - - f := fixtures.Basic().One() - - r, err := git.Open(filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()), nil) - require.NoError(err) - - b, err := r.BlobObject(plumbing.NewHash("9a48f23120e880dfbe41f7c9b7b708e9ee62a492")) - require.NoError(err) - - fs, err := newFileStats(b, "PHP") - require.NoError(err) - - require.Equal(17, fs["}"].Count) - require.Equal(Code, fs["}"].Kind) - require.Equal(10, fs["*/"].Count) - require.Equal(Comment, fs["*/"].Kind) -} diff --git a/internal/commitstats/common.go b/internal/commitstats/common.go new file mode 100644 index 000000000..b8a2a4885 --- /dev/null +++ b/internal/commitstats/common.go @@ -0,0 +1,29 @@ +package commitstats + +// LineKind defines the kind of a line in a file. +type LineKind int + +const ( + // Code represents a line of code. + Code LineKind = iota + 1 + // Comment represents a line of comment. + Comment + // Blank represents an empty line. + Blank + // Other represents a line from any other kind. + Other +) + +// KindStats represents the stats for a kind of lines in a file. +type KindStats struct { + // Additions number of lines added. + Additions int + // Deletions number of lines deleted. + Deletions int +} + +// Add adds the given stats to this stats. +func (k *KindStats) Add(add KindStats) { + k.Additions += add.Additions + k.Deletions += add.Deletions +} diff --git a/internal/commitstats/file.go b/internal/commitstats/file.go new file mode 100644 index 000000000..b288a8048 --- /dev/null +++ b/internal/commitstats/file.go @@ -0,0 +1,331 @@ +package commitstats + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + + "github.com/hhatto/gocloc" + "github.com/src-d/go-git/utils/binary" + "gopkg.in/src-d/enry.v1" + "gopkg.in/src-d/go-git.v4" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/utils/ioutil" + "gopkg.in/src-d/go-git.v4/utils/merkletrie" +) + +// CommitFileStats represents the stats for a file in a commit. +type CommitFileStats struct { + Path string + Language string + Code KindStats + Comment KindStats + Blank KindStats + Other KindStats + Total KindStats +} + +// CalculateByFile calculates the stats for all files from a commit to another. +// If from is nil, the first parent is used. if the commit is an orphan, +// the stats are compared against an empty commit. +func CalculateByFile(r *git.Repository, from, to *object.Commit) ([]CommitFileStats, error) { + var err error + if to.NumParents() != 0 && from == nil { + from, err = to.Parent(0) + if err != nil { + return nil, err + } + } + + if from == nil { + return fileStatsFromCommit(to) + } + + return fileStatsFromDiff(r, from, to) +} + +func fileStatsFromCommit(c *object.Commit) ([]CommitFileStats, error) { + var result []CommitFileStats + files, err := c.Files() + if err != nil { + return nil, err + } + + err = files.ForEach(func(f *object.File) error { + lang := getLanguage(f.Name) + fi, err := blobFileStats(&f.Blob, f.Name, lang) + if err != nil { + if err == errIgnored { + return nil + } + return err + } + + stats := commitFileStatsFromFileStats(fi, f.Name, lang) + result = append(result, stats) + return nil + }) + if err != nil { + return nil, err + } + + return result, nil +} + +func commitFileStatsFromFileStats(fi fileStats, path, lang string) CommitFileStats { + stats := CommitFileStats{ + Path: path, + } + + if fi != nil { + stats.Language = lang + for _, info := range fi { + fillKindStats(&stats.Total, info) + switch info.Kind { + case Code: + fillKindStats(&stats.Code, info) + case Comment: + fillKindStats(&stats.Comment, info) + case Blank: + fillKindStats(&stats.Blank, info) + case Other: + fillKindStats(&stats.Other, info) + } + } + } + + return stats +} + +func fileStatsFromDiff(r *git.Repository, from, to *object.Commit) ([]CommitFileStats, error) { + ch, err := computeDiff(from, to) + if err != nil { + return nil, err + } + + var result []CommitFileStats + for _, change := range ch { + s, err := fileStatsFromChange(r, change) + if err != nil { + if err == errIgnored { + continue + } + return nil, err + } + + result = append(result, s) + } + + return result, nil +} + +func fileStatsFromChange(r *git.Repository, ch *object.Change) (CommitFileStats, error) { + a, err := ch.Action() + if err != nil { + return CommitFileStats{}, err + } + + var fi fileStats + var name string + + switch a { + case merkletrie.Delete: + name = ch.From.Name + fi, err = changeEntryFileStats(r, &ch.From) + if err != nil { + return CommitFileStats{}, err + } + case merkletrie.Insert: + name = ch.To.Name + fi, err = changeEntryFileStats(r, &ch.To) + if err != nil { + return CommitFileStats{}, err + } + case merkletrie.Modify: + src, err := changeEntryFileStats(r, &ch.From) + if err != nil { + return CommitFileStats{}, err + } + + name = ch.To.Name + dst, err := changeEntryFileStats(r, &ch.To) + if err != nil { + return CommitFileStats{}, err + } + + if src == nil { + src = make(fileStats) + } + + if dst == nil { + dst = make(fileStats) + } + + dst.sub(src) + fi = dst + } + + return commitFileStatsFromFileStats(fi, name, getLanguage(name)), nil +} + +var errIgnored = errors.New("ignored file") + +var languages = gocloc.NewDefinedLanguages() + +type fileStats map[string]*LineInfo + +// LineInfo represents the information about a sigle line. +type LineInfo struct { + Kind LineKind + Count int +} + +func newFileStats(f *object.Blob, lang string) (fileStats, error) { + ff := make(fileStats, 50) + + r, err := f.Reader() + if err != nil { + return ff, err + } + + defer ioutil.CheckClose(r, &err) + + l, ok := languages.Langs[lang] + if ok { + fillStatsCloc(r, l, &ff) + return ff, nil + } + + return ff, fillStatsPlaintext(r, &ff) +} + +func fillStatsCloc(r io.Reader, l *gocloc.Language, ff *fileStats) { + gocloc.AnalyzeReader("", l, r, &gocloc.ClocOptions{ + OnBlank: ff.addBlank, + OnCode: ff.addCode, + OnComment: ff.addComment, + }) +} + +func fillStatsPlaintext(r io.Reader, ff *fileStats) error { + s := bufio.NewScanner(r) + for s.Scan() { + ff.addOther(s.Text()) + } + + return s.Err() +} + +func (fi fileStats) addCode(line string) { fi.add(line, Code) } +func (fi fileStats) addComment(line string) { fi.add(line, Comment) } +func (fi fileStats) addBlank(line string) { fi.add(line, Blank) } +func (fi fileStats) addOther(line string) { fi.add(line, Other) } +func (fi fileStats) add(line string, k LineKind) { + if fi[line] == nil { + fi[line] = &LineInfo{} + } + + fi[line].Count++ + fi[line].Kind = k +} + +func (fi fileStats) sub(to fileStats) { + for line, i := range to { + if _, ok := fi[line]; ok { + fi[line].Count -= i.Count + } else { + fi[line] = i + fi[line].Count *= -1 + } + } +} + +func fillKindStats(ks *KindStats, info *LineInfo) { + if info.Count > 0 { + ks.Additions += info.Count + } + if info.Count < 0 { + ks.Deletions += (info.Count * -1) + } +} + +func (fi fileStats) String() string { + buf := bytes.NewBuffer(nil) + for line, i := range fi { + sign := ' ' + switch { + case i.Count > 0: + sign = '+' + case i.Count < 0: + sign = '-' + } + + fmt.Fprintf(buf, "%c [%3dx] %s\n", sign, i.Count, line) + } + + return buf.String() +} + +func blobFileStats(blob *object.Blob, filename, lang string) (fileStats, error) { + if enry.IsVendor(filename) { + return nil, errIgnored + } + + isBinary, err := isBinary(blob) + if err != nil { + return nil, err + } + + if isBinary { + return nil, nil + } + + return newFileStats(blob, lang) +} + +func getLanguage(filename string) string { + if lang, ok := enry.GetLanguageByFilename(filename); ok { + return lang + } + + if lang, ok := enry.GetLanguageByExtension(filename); ok { + return lang + } + + return "" +} + +func isBinary(b *object.Blob) (bin bool, err error) { + reader, err := b.Reader() + if err != nil { + return false, err + } + + defer ioutil.CheckClose(reader, &err) + return binary.IsBinary(reader) +} + +func computeDiff(from, to *object.Commit) (object.Changes, error) { + src, err := to.Tree() + if err != nil { + return nil, err + } + + dst, err := from.Tree() + if err != nil { + return nil, err + } + + return object.DiffTree(dst, src) +} + +func changeEntryFileStats(r *git.Repository, ch *object.ChangeEntry) (fileStats, error) { + blob, err := r.BlobObject(ch.TreeEntry.Hash) + if err != nil { + return nil, err + } + + return blobFileStats(blob, ch.Name, getLanguage(ch.Name)) +} diff --git a/internal/commitstats/file_test.go b/internal/commitstats/file_test.go new file mode 100644 index 000000000..d1236eae5 --- /dev/null +++ b/internal/commitstats/file_test.go @@ -0,0 +1,262 @@ +package commitstats + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/storage/filesystem" +) + +func TestNewFileStats(t *testing.T) { + require := require.New(t) + + err := fixtures.Init() + require.NoError(err) + + defer func() { + err := fixtures.Clean() + require.NoError(err) + }() + + f := fixtures.Basic().One() + + r, err := git.Open(filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()), nil) + require.NoError(err) + + b, err := r.BlobObject(plumbing.NewHash("9a48f23120e880dfbe41f7c9b7b708e9ee62a492")) + require.NoError(err) + + fs, err := newFileStats(b, "PHP") + require.NoError(err) + + require.Equal(17, fs["}"].Count) + require.Equal(Code, fs["}"].Kind) + require.Equal(10, fs["*/"].Count) + require.Equal(Comment, fs["*/"].Kind) +} + +func TestCalculateByFile(t *testing.T) { + err := fixtures.Init() + require.NoError(t, err) + + defer func() { + err := fixtures.Clean() + require.NoError(t, err) + }() + + tests := map[string]struct { + fixture *fixtures.Fixture + from plumbing.Hash + to plumbing.Hash + expected interface{} + }{ + "basic": { + fixture: fixtures.ByURL("https://github.com/src-d/go-git.git").One(), + to: plumbing.NewHash("d2d68d3413353bd4bf20891ac1daa82cd6e00fb9"), + expected: []CommitFileStats{ + { + Path: "common_test.go", + Language: "Go", + Blank: KindStats{Deletions: 1}, + Total: KindStats{Deletions: 1}, + }, + { + Path: "core/storage.go", + Language: "Go", + Code: KindStats{Additions: 1}, + Total: KindStats{Additions: 1}, + }, + { + Path: "fixtures/data/pack-a3fed42da1e8189a077c0e6846c040dcf73fc9dd.idx", + }, + { + Path: "fixtures/data/pack-a3fed42da1e8189a077c0e6846c040dcf73fc9dd.pack", + }, + { + Path: "fixtures/data/pack-c544593473465e6315ad4182d04d366c4592b829.idx", + }, + { + Path: "fixtures/data/pack-c544593473465e6315ad4182d04d366c4592b829.pack", + }, + { + Path: "fixtures/data/pack-f2e0a8889a746f7600e07d2246a2e29a72f696be.idx", + }, + { + Path: "fixtures/data/pack-f2e0a8889a746f7600e07d2246a2e29a72f696be.pack", + }, + { + Path: "fixtures/fixtures.go", + Language: "Go", + Code: KindStats{Additions: 83}, + Blank: KindStats{Additions: 19}, + Total: KindStats{Additions: 102}, + }, + { + Path: "formats/idxfile/decoder.go", + Language: "Go", + Code: KindStats{Additions: 3, Deletions: 1}, + Blank: KindStats{Deletions: 1}, + Total: KindStats{Additions: 3, Deletions: 2}, + }, + { + Path: "formats/idxfile/decoder_test.go", + Language: "Go", + Code: KindStats{Additions: 31, Deletions: 11}, + Blank: KindStats{Additions: 7}, + Total: KindStats{Additions: 38, Deletions: 11}, + }, + { + Path: "formats/idxfile/encoder.go", + Language: "Go", + Code: KindStats{Additions: 8, Deletions: 9}, + Total: KindStats{Additions: 8, Deletions: 9}, + }, + { + Path: "formats/idxfile/encoder_test.go", + Language: "Go", + Code: KindStats{Additions: 16, Deletions: 27}, + Comment: KindStats{Deletions: 0}, + Blank: KindStats{Deletions: 3}, + Other: KindStats{Deletions: 0}, + Total: KindStats{Additions: 16, + Deletions: 30}, + }, + { + Path: "formats/idxfile/fixtures/git-fixture.idx", + }, + { + Path: "formats/idxfile/idxfile.go", + Language: "Go", + Code: KindStats{Additions: 8, Deletions: 1}, + Blank: KindStats{Additions: 1}, + Total: KindStats{Additions: 9, Deletions: 1}, + }, + { + Path: "formats/packfile/decoder.go", + Language: "Go", + Code: KindStats{Additions: 56, Deletions: 70}, + Comment: KindStats{Additions: 2, Deletions: 9}, + Blank: KindStats{Deletions: 4}, + Total: KindStats{Additions: 58, Deletions: 83}, + }, + { + Path: "formats/packfile/decoder_test.go", + Language: "Go", + Code: KindStats{Additions: 23, Deletions: 45}, + Blank: KindStats{Deletions: 3}, + Total: KindStats{Additions: 23, Deletions: 48}, + }, + { + Path: "formats/packfile/parser.go", + Language: "Go", + Code: KindStats{Additions: 53, Deletions: 15}, + Blank: KindStats{Additions: 9}, + Total: KindStats{Additions: 62, Deletions: 15}, + }, + { + Path: "formats/packfile/parser_test.go", + Language: "Go", + Code: KindStats{Additions: 91, Deletions: 59}, + Comment: KindStats{Deletions: 328}, + Blank: KindStats{Deletions: 53}, + Total: KindStats{Additions: 91, Deletions: 440}, + }, + { + Path: "storage/filesystem/internal/dotgit/dotgit.go", + Language: "Go", + Code: KindStats{Additions: 23, Deletions: 22}, + Blank: KindStats{Additions: 2}, + Total: KindStats{Additions: 25, Deletions: 22}, + }, + { + Path: "storage/filesystem/internal/index/index.go", + Language: "Go", + Code: KindStats{Additions: 8, Deletions: 4}, + Total: KindStats{Additions: 8, Deletions: 4}, + }, + { + Path: "storage/filesystem/object.go", + Language: "Go", + Code: KindStats{Additions: 3}, + Blank: KindStats{Additions: 1}, + Total: KindStats{Additions: 4}, + }, + { + Path: "storage/memory/storage.go", + Language: "Go", + Code: KindStats{Additions: 7}, + Blank: KindStats{Additions: 3}, + Total: KindStats{Additions: 10}, + }, + }, + }, + "orphan": { + fixture: fixtures.Basic().One(), + to: plumbing.NewHash("b029517f6300c2da0f4b651b8642506cd6aaf45d"), + expected: []CommitFileStats{ + { + Path: "LICENSE", + Language: "Text", + Other: KindStats{Additions: 22}, + Total: KindStats{Additions: 22}, + }, + }, + }, + "other": { + fixture: fixtures.Basic().One(), + to: plumbing.NewHash("b8e471f58bcbca63b07bda20e428190409c2db47"), + expected: []CommitFileStats{ + { + Path: "CHANGELOG", + Other: KindStats{Additions: 1}, + Total: KindStats{Additions: 1}, + }, + }, + }, + "binary": { + fixture: fixtures.Basic().One(), + to: plumbing.NewHash("35e85108805c84807bc66a02d91535e1e24b38b9"), + expected: []CommitFileStats{{Path: "binary.jpg"}}, + }, + "vendor": { + fixture: fixtures.Basic().One(), + to: plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5"), + expected: ([]CommitFileStats)(nil), + }, + "with_from": { + fixture: fixtures.Basic().One(), + to: plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5"), + from: plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5"), + expected: ([]CommitFileStats)(nil), + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + require := require.New(t) + + r, err := git.Open(filesystem.NewStorage(test.fixture.DotGit(), cache.NewObjectLRUDefault()), nil) + require.NoError(err) + + to, err := r.CommitObject(test.to) + require.NoError(err) + + var from *object.Commit + if !test.from.IsZero() { + from, err = r.CommitObject(test.from) + require.NoError(err) + } + + stats, err := CalculateByFile(r, from, to) + require.NoError(err) + + assert.Equal(t, test.expected, stats) + }) + } +} diff --git a/internal/function/commit_file_stats.go b/internal/function/commit_file_stats.go new file mode 100644 index 000000000..66ca58405 --- /dev/null +++ b/internal/function/commit_file_stats.go @@ -0,0 +1,115 @@ +package function + +import ( + "fmt" + + "github.com/src-d/gitbase/internal/commitstats" + + "github.com/src-d/go-mysql-server/sql" + "gopkg.in/src-d/go-git.v4" + "gopkg.in/src-d/go-git.v4/plumbing/object" +) + +// CommitFileStats calculates the diff stats of all files for a given commit. +// Vendored files are ignored in the output of this function. +type CommitFileStats struct { + Repository sql.Expression + From sql.Expression + To sql.Expression +} + +// NewCommitFileStats creates a new COMMIT_FILE_STATS function. +func NewCommitFileStats(args ...sql.Expression) (sql.Expression, error) { + var f CommitFileStats + switch len(args) { + case 2: + f.Repository, f.To = args[0], args[1] + case 3: + f.Repository, f.From, f.To = args[0], args[1], args[2] + default: + return nil, sql.ErrInvalidArgumentNumber.New("COMMIT_FILE_STATS", "2 or 3", len(args)) + } + + return &f, nil +} + +func (f *CommitFileStats) String() string { + if f.From == nil { + return fmt.Sprintf("commit_file_stats(%s, %s)", f.Repository, f.To) + } + + return fmt.Sprintf("commit_file_stats(%s, %s, %s)", f.Repository, f.From, f.To) +} + +// Type implements the Expression interface. +func (CommitFileStats) Type() sql.Type { + return sql.Array(sql.JSON) +} + +// TransformUp implements the Expression interface. +func (f *CommitFileStats) TransformUp(fn sql.TransformExprFunc) (sql.Expression, error) { + repo, err := f.Repository.TransformUp(fn) + if err != nil { + return nil, err + } + + to, err := f.To.TransformUp(fn) + if err != nil { + return nil, err + } + + if f.From == nil { + return fn(&CommitFileStats{Repository: repo, To: to}) + } + + from, err := f.From.TransformUp(fn) + if err != nil { + return nil, err + } + + return fn(&CommitFileStats{Repository: repo, From: from, To: to}) +} + +// Children implements the Expression interface. +func (f *CommitFileStats) Children() []sql.Expression { + if f.From == nil { + return []sql.Expression{f.Repository, f.To} + } + + return []sql.Expression{f.Repository, f.From, f.To} +} + +// IsNullable implements the Expression interface. +func (*CommitFileStats) IsNullable() bool { + return true +} + +// Resolved implements the Expression interface. +func (f *CommitFileStats) Resolved() bool { + return f.Repository.Resolved() && + f.To.Resolved() && + (f.From == nil || f.From.Resolved()) +} + +// Eval implements the Expression interface. +func (f *CommitFileStats) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) { + return evalStatsFunc( + ctx, + "commit_file_stats", + row, + f.Repository, f.From, f.To, + func(r *git.Repository, from, to *object.Commit) (interface{}, error) { + stats, err := commitstats.CalculateByFile(r, from, to) + if err != nil { + return nil, err + } + + // Since the type is an array, it must be converted to []interface{}. + var result = make([]interface{}, len(stats)) + for i, s := range stats { + result[i] = s + } + return result, nil + }, + ) +} diff --git a/internal/function/commit_file_stats_test.go b/internal/function/commit_file_stats_test.go new file mode 100644 index 000000000..0cc3d5e9e --- /dev/null +++ b/internal/function/commit_file_stats_test.go @@ -0,0 +1,91 @@ +package function + +import ( + "context" + "testing" + + "github.com/src-d/gitbase" + "github.com/src-d/gitbase/internal/commitstats" + "github.com/stretchr/testify/require" + + "github.com/src-d/go-mysql-server/sql" + "github.com/src-d/go-mysql-server/sql/expression" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4/plumbing/cache" +) + +func TestCommitFileStats(t *testing.T) { + require.NoError(t, fixtures.Init()) + defer func() { + require.NoError(t, fixtures.Clean()) + }() + + path := fixtures.ByTag("worktree").One().Worktree().Root() + + pool := gitbase.NewRepositoryPool(cache.DefaultMaxSize) + require.NoError(t, pool.AddGitWithID("worktree", path)) + + session := gitbase.NewSession(pool) + ctx := sql.NewContext(context.TODO(), sql.WithSession(session)) + + testCases := []struct { + name string + repo sql.Expression + from sql.Expression + to sql.Expression + row sql.Row + expected interface{} + }{ + { + name: "init commit", + repo: expression.NewGetField(0, sql.Text, "repository_id", false), + from: nil, + to: expression.NewGetField(1, sql.Text, "commit_hash", false), + row: sql.NewRow("worktree", "b029517f6300c2da0f4b651b8642506cd6aaf45d"), + expected: []interface{}{ + commitstats.CommitFileStats{ + Path: "LICENSE", + Language: "Text", + Other: commitstats.KindStats{Additions: 22}, + Total: commitstats.KindStats{Additions: 22}, + }, + }, + }, + { + name: "invalid repository id", + repo: expression.NewGetField(0, sql.Text, "repository_id", false), + from: nil, + to: expression.NewGetField(1, sql.Text, "commit_hash", false), + row: sql.NewRow("foobar", "b029517f6300c2da0f4b651b8642506cd6aaf45d"), + expected: nil, + }, + { + name: "invalid to", + repo: expression.NewGetField(0, sql.Text, "repository_id", false), + from: nil, + to: expression.NewGetField(1, sql.Text, "commit_hash", false), + row: sql.NewRow("worktree", "foobar"), + expected: nil, + }, + { + name: "invalid from", + repo: expression.NewGetField(0, sql.Text, "repository_id", false), + from: expression.NewGetField(2, sql.Text, "commit_hash", false), + to: expression.NewGetField(1, sql.Text, "commit_hash", false), + row: sql.NewRow("worktree", "b029517f6300c2da0f4b651b8642506cd6aaf45d", "foobar"), + expected: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + diff, err := NewCommitFileStats(tc.repo, tc.from, tc.to) + require.NoError(t, err) + + result, err := diff.Eval(ctx, tc.row) + require.NoError(t, err) + + require.EqualValues(t, tc.expected, result) + }) + } +} diff --git a/internal/function/commit_stats.go b/internal/function/commit_stats.go index f5f02870e..735030e37 100644 --- a/internal/function/commit_stats.go +++ b/internal/function/commit_stats.go @@ -8,11 +8,13 @@ import ( "github.com/src-d/gitbase/internal/commitstats" "github.com/src-d/go-mysql-server/sql" + "gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" ) -// CommitStats calculates the diff stats for a given commit. +// CommitStats calculates the diff stats for a given commit. Vendored files +// are completely ignored for the output of this function. type CommitStats struct { Repository sql.Expression From sql.Expression @@ -94,51 +96,27 @@ func (f *CommitStats) Resolved() bool { // Eval implements the Expression interface. func (f *CommitStats) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) { - span, ctx := ctx.Span("gitbase.CommitStats") - defer span.Finish() - - r, err := f.resolveRepo(ctx, row) - if err != nil { - ctx.Warn(0, "commit_stats: unable to resolve repository") - logrus.WithField("err", err).Error("commit_stats: unable to resolve repository") - return nil, nil - } - - log := logrus.WithField("repository", r) - - to, err := f.resolveCommit(ctx, r, row, f.To) - if err != nil { - ctx.Warn(0, "commit_stats: unable to resolve 'to' commit of repository: %v", r) - log.WithField("err", err).Error("commit_stats: unable to resolve 'to' commit") - return nil, nil - } - - from, err := f.resolveCommit(ctx, r, row, f.From) - if err != nil { - ctx.Warn(0, "commit_stats: unable to resolve 'from' commit of repository: %v", r) - log.WithField("err", err).Error("commit_stats: unable to resolve from commit") - return nil, nil - } - - result, err := commitstats.Calculate(r.Repository, from, to) - if err != nil { - ctx.Warn(0, "commit_stats: unable to calculate for repository: %v, from: %v, to: %v", r, from, to) - log.WithFields(logrus.Fields{ - "err": err, - "from": from, - "to": to, - }).Error("commit_stats: unable to calculate") - return nil, nil - } - - return result, nil + return evalStatsFunc( + ctx, + "commit_stats", + row, + f.Repository, f.From, f.To, + func(r *git.Repository, from, to *object.Commit) (interface{}, error) { + return commitstats.Calculate(r, from, to) + }, + ) } -func (f *CommitStats) resolveRepo(ctx *sql.Context, r sql.Row) (*gitbase.Repository, error) { - repoID, err := exprToString(ctx, f.Repository, r) +func resolveRepo( + ctx *sql.Context, + r sql.Row, + repo sql.Expression, +) (*gitbase.Repository, error) { + repoID, err := exprToString(ctx, repo, r) if err != nil { return nil, err } + s, ok := ctx.Session.(*gitbase.Session) if !ok { return nil, gitbase.ErrInvalidGitbaseSession.New(ctx.Session) @@ -146,8 +124,11 @@ func (f *CommitStats) resolveRepo(ctx *sql.Context, r sql.Row) (*gitbase.Reposit return s.Pool.GetRepo(repoID) } -func (f *CommitStats) resolveCommit( - ctx *sql.Context, r *gitbase.Repository, row sql.Row, e sql.Expression, +func resolveCommit( + ctx *sql.Context, + r *gitbase.Repository, + row sql.Row, + e sql.Expression, ) (*object.Commit, error) { str, err := exprToString(ctx, e, row) if err != nil { @@ -166,3 +147,50 @@ func (f *CommitStats) resolveCommit( return r.CommitObject(*commitHash) } + +func evalStatsFunc( + ctx *sql.Context, + name string, + row sql.Row, + repoExpr, fromExpr, toExpr sql.Expression, + fn func(r *git.Repository, from, to *object.Commit) (interface{}, error), +) (interface{}, error) { + span, ctx := ctx.Span("gitbase." + name) + defer span.Finish() + + r, err := resolveRepo(ctx, row, repoExpr) + if err != nil { + ctx.Warn(0, name+": unable to resolve repository") + logrus.WithField("err", err).Error(name + ": unable to resolve repository") + return nil, nil + } + + log := logrus.WithField("repository", r) + + to, err := resolveCommit(ctx, r, row, toExpr) + if err != nil { + ctx.Warn(0, name+": unable to resolve 'to' commit of repository: %v", r) + log.WithField("err", err).Error(name + ": unable to resolve 'to' commit") + return nil, nil + } + + from, err := resolveCommit(ctx, r, row, fromExpr) + if err != nil { + ctx.Warn(0, name+": unable to resolve 'from' commit of repository: %v", r) + log.WithField("err", err).Error(name + ": unable to resolve from commit") + return nil, nil + } + + result, err := fn(r.Repository, from, to) + if err != nil { + ctx.Warn(0, name+": unable to calculate for repository: %v, from: %v, to: %v", r, from, to) + log.WithFields(logrus.Fields{ + "err": err, + "from": from, + "to": to, + }).Error(name + ": unable to calculate") + return nil, nil + } + + return result, nil +} diff --git a/internal/function/commit_stats_test.go b/internal/function/commit_stats_test.go index 00e0a20f8..af99a5650 100644 --- a/internal/function/commit_stats_test.go +++ b/internal/function/commit_stats_test.go @@ -43,7 +43,7 @@ func TestCommitStatsEval(t *testing.T) { to: expression.NewGetField(1, sql.Text, "commit_hash", false), row: sql.NewRow("worktree", "b029517f6300c2da0f4b651b8642506cd6aaf45d"), expected: &commitstats.CommitStats{ - Files: 2, + Files: 1, Other: commitstats.KindStats{Additions: 22, Deletions: 0}, Total: commitstats.KindStats{Additions: 22, Deletions: 0}, }, diff --git a/internal/function/registry.go b/internal/function/registry.go index ef49567a7..f24038153 100644 --- a/internal/function/registry.go +++ b/internal/function/registry.go @@ -5,6 +5,7 @@ import "github.com/src-d/go-mysql-server/sql" // Functions for gitbase queries. var Functions = []sql.Function{ sql.FunctionN{Name: "commit_stats", Fn: NewCommitStats}, + sql.FunctionN{Name: "commit_file_stats", Fn: NewCommitFileStats}, sql.Function1{Name: "is_tag", Fn: NewIsTag}, sql.Function1{Name: "is_remote", Fn: NewIsRemote}, sql.FunctionN{Name: "language", Fn: NewLanguage},