Use git log name-status in get last commit (go-gitea#16059)

* Improve get last commit using git log --name-status git log --name-status -c provides information about the diff between a commit and its parents. Using this and adjusting the algorithm to use the first change to a path allows for a much faster generation of commit info. There is a subtle change in the results generated but this will cause the results to more closely match those from elsewhere. Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Lauris BH <lauris@nix.lv>
kitspace · Aug 10, 2021 · b26580f · b26580f
1 parent 9cbf172
commit b26580f
Show file tree

Hide file tree

Showing 40 changed files with 2,538 additions and 295 deletions.
diff --git a/go.mod b/go.mod
@@ -30,6 +30,8 @@ require (
 	github.com/couchbase/goutils v0.0.0-20210118111533-e33d3ffb5401 // indirect
 	github.com/denisenkom/go-mssqldb v0.10.0
 	github.com/dgrijalva/jwt-go v3.2.0+incompatible
+	github.com/djherbis/buffer v1.2.0
+	github.com/djherbis/nio/v3 v3.0.1
 	github.com/dustin/go-humanize v1.0.0
 	github.com/editorconfig/editorconfig-core-go/v2 v2.4.2
 	github.com/emirpasic/gods v1.12.0

diff --git a/go.sum b/go.sum
@@ -244,6 +244,11 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
+github.com/djherbis/buffer v1.1.0/go.mod h1:VwN8VdFkMY0DCALdY8o00d3IZ6Amz/UNVMWcSaJT44o=
+github.com/djherbis/buffer v1.2.0 h1:PH5Dd2ss0C7CRRhQCZ2u7MssF+No9ide8Ye71nPHcrQ=
+github.com/djherbis/buffer v1.2.0/go.mod h1:fjnebbZjCUpPinBRD+TDwXSOeNQ7fPQWLfGQqiAiUyE=
+github.com/djherbis/nio/v3 v3.0.1 h1:6wxhnuppteMa6RHA4L81Dq7ThkZH8SwnDzXDYy95vB4=
+github.com/djherbis/nio/v3 v3.0.1/go.mod h1:Ng4h80pbZFMla1yKzm61cF0tqqilXZYrogmWgZxOcmg=
 github.com/dlclark/regexp2 v1.1.6/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
 github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
 github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=

diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go
@@ -11,6 +11,9 @@ import (
 	"math"
 	"strconv"
 	"strings"
+
+	"github.com/djherbis/buffer"
+	"github.com/djherbis/nio/v3"
 )
 
 // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
@@ -42,7 +45,7 @@ func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()
 		}
 	}()
 
-	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
+	// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
 	batchReader := bufio.NewReader(batchStdoutReader)
 
 	return batchStdinWriter, batchReader, cancel
@@ -53,7 +56,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
 	// so let's create a batch stdin and stdout
 	batchStdinReader, batchStdinWriter := io.Pipe()
-	batchStdoutReader, batchStdoutWriter := io.Pipe()
+	batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
 	cancel := func() {
 		_ = batchStdinReader.Close()
 		_ = batchStdinWriter.Close()
@@ -74,7 +77,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 	}()
 
 	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
-	batchReader := bufio.NewReader(batchStdoutReader)
+	batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
 
 	return batchStdinWriter, batchReader, cancel
 }
@@ -84,22 +87,31 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 // <sha> SP <type> SP <size> LF
 // sha is a 40byte not 20byte here
 func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
-	sha, err = rd.ReadBytes(' ')
+	typ, err = rd.ReadString('\n')
 	if err != nil {
 		return
 	}
-	sha = sha[:len(sha)-1]
-
-	typ, err = rd.ReadString('\n')
-	if err != nil {
+	if len(typ) == 1 {
+		typ, err = rd.ReadString('\n')
+		if err != nil {
+			return
+		}
+	}
+	idx := strings.IndexByte(typ, ' ')
+	if idx < 0 {
+		log("missing space typ: %s", typ)
+		err = ErrNotExist{ID: string(sha)}
 		return
 	}
+	sha = []byte(typ[:idx])
+	typ = typ[idx+1:]
 
-	idx := strings.Index(typ, " ")
+	idx = strings.IndexByte(typ, ' ')
 	if idx < 0 {
 		err = ErrNotExist{ID: string(sha)}
 		return
 	}
+
 	sizeStr := typ[idx+1 : len(typ)-1]
 	typ = typ[:idx]
 
@@ -130,7 +142,7 @@ headerLoop:
 	}
 
 	// Discard the rest of the tag
-	discard := size - n
+	discard := size - n + 1
 	for discard > math.MaxInt32 {
 		_, err := rd.Discard(math.MaxInt32)
 		if err != nil {
@@ -200,85 +212,42 @@ func To40ByteSHA(sha, out []byte) []byte {
 	return out
 }
 
-// ParseTreeLineSkipMode reads an entry from a tree in a cat-file --batch stream
-// This simply skips the mode - saving a substantial amount of time and carefully avoids allocations - except where fnameBuf is too small.
+// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
+// This carefully avoids allocations - except where fnameBuf is too small.
 // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
 //
 // Each line is composed of:
 // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
 //
 // We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
-func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sha []byte, n int, err error) {
+func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
 	var readBytes []byte
-	// Skip the Mode
-	readBytes, err = rd.ReadSlice(' ') // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
-	if err != nil {
-		return
-	}
-	n += len(readBytes)
 
-	// Deal with the fname
+	// Read the Mode & fname
 	readBytes, err = rd.ReadSlice('\x00')
-	copy(fnameBuf, readBytes)
-	if len(fnameBuf) > len(readBytes) {
-		fnameBuf = fnameBuf[:len(readBytes)] // cut the buf the correct size
-	} else {
-		fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) // extend the buf and copy in the missing bits
-	}
-	for err == bufio.ErrBufferFull { // Then we need to read more
-		readBytes, err = rd.ReadSlice('\x00')
-		fnameBuf = append(fnameBuf, readBytes...) // there is little point attempting to avoid allocations here so just extend
-	}
-	n += len(fnameBuf)
 	if err != nil {
 		return
 	}
-	fnameBuf = fnameBuf[:len(fnameBuf)-1] // Drop the terminal NUL
-	fname = fnameBuf                      // set the returnable fname to the slice
-
-	// Now deal with the 20-byte SHA
-	idx := 0
-	for idx < 20 {
-		read := 0
-		read, err = rd.Read(shaBuf[idx:20])
-		n += read
-		if err != nil {
-			return
-		}
-		idx += read
-	}
-	sha = shaBuf
-	return
-}
-
-// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
-// This carefully avoids allocations - except where fnameBuf is too small.
-// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
-//
-// Each line is composed of:
-// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
-//
-// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
-func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
-	var readBytes []byte
+	idx := bytes.IndexByte(readBytes, ' ')
+	if idx < 0 {
+		log("missing space in readBytes ParseTreeLine: %s", readBytes)
 
-	// Read the Mode
-	readBytes, err = rd.ReadSlice(' ')
-	if err != nil {
+		err = &ErrNotExist{}
 		return
 	}
-	n += len(readBytes)
-	copy(modeBuf, readBytes)
-	if len(modeBuf) > len(readBytes) {
-		modeBuf = modeBuf[:len(readBytes)]
-	} else {
-		modeBuf = append(modeBuf, readBytes[len(modeBuf):]...)
 
+	n += idx + 1
+	copy(modeBuf, readBytes[:idx])
+	if len(modeBuf) >= idx {
+		modeBuf = modeBuf[:idx]
+	} else {
+		modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
 	}
-	mode = modeBuf[:len(modeBuf)-1] // Drop the SP
+	mode = modeBuf
+
+	readBytes = readBytes[idx+1:]
 
 	// Deal with the fname
-	readBytes, err = rd.ReadSlice('\x00')
 	copy(fnameBuf, readBytes)
 	if len(fnameBuf) > len(readBytes) {
 		fnameBuf = fnameBuf[:len(readBytes)]
@@ -297,7 +266,7 @@ func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fn
 	fname = fnameBuf
 
 	// Deal with the 20-byte SHA
-	idx := 0
+	idx = 0
 	for idx < 20 {
 		read := 0
 		read, err = rd.Read(shaBuf[idx:20])