diff --git a/client/web/src/featureFlags/featureFlags.ts b/client/web/src/featureFlags/featureFlags.ts index 0b841f94fcb0..2bbd69b40129 100644 --- a/client/web/src/featureFlags/featureFlags.ts +++ b/client/web/src/featureFlags/featureFlags.ts @@ -15,6 +15,7 @@ export type FeatureFlagName = | 'admin-analytics-cache-disabled' | 'search-input-show-history' | 'user-management-disabled' + | 'enable-streaming-git-blame' interface OrgFlagOverride { orgID: string diff --git a/client/web/src/repo/blame/useBlameHunks.ts b/client/web/src/repo/blame/useBlameHunks.ts index 3e7df261792b..e74af2c6e264 100644 --- a/client/web/src/repo/blame/useBlameHunks.ts +++ b/client/web/src/repo/blame/useBlameHunks.ts @@ -1,9 +1,10 @@ import { useMemo } from 'react' +import { fetchEventSource } from '@microsoft/fetch-event-source' import { formatDistanceStrict } from 'date-fns' import { truncate } from 'lodash' import { Observable, of } from 'rxjs' -import { map } from 'rxjs/operators' +import { map, throttleTime } from 'rxjs/operators' import { memoizeObservable } from '@sourcegraph/common' import { dataOrThrowErrors, gql } from '@sourcegraph/http-client' @@ -11,6 +12,7 @@ import { makeRepoURI } from '@sourcegraph/shared/src/util/url' import { useObservable } from '@sourcegraph/wildcard' import { requestGraphQL } from '../../backend/graphql' +import { useFeatureFlag } from '../../featureFlags/useFeatureFlag' import { GitBlameResult, GitBlameVariables } from '../../graphql-operations' import { useBlameVisibility } from './useBlameVisibility' @@ -24,20 +26,45 @@ interface BlameHunkDisplayInfo { message: string } -export type BlameHunk = NonNullable< - NonNullable['commit']>['blob'] ->['blame'][number] & { displayInfo: BlameHunkDisplayInfo } +export interface BlameHunk { + startLine: number + endLine: number + message: string + rev: string + author: { + date: string + person: { + email: string + displayName: string + user: + | undefined + | null + | { + username: string + } + } + } + commit: { + url: string + parents: { + oid: string + }[] + } + displayInfo: BlameHunkDisplayInfo +} -const fetchBlame = memoizeObservable( +const fetchBlameViaGraphQL = memoizeObservable( ({ repoName, revision, filePath, + sourcegraphURL, }: { repoName: string revision: string filePath: string - }): Observable[] | undefined> => + sourcegraphURL: string + }): Observable<{ current: BlameHunk[] | undefined }> => requestGraphQL( gql` query GitBlame($repo: String!, $rev: String!, $path: String!) { @@ -74,19 +101,98 @@ const fetchBlame = memoizeObservable( { repo: repoName, rev: revision, path: filePath } ).pipe( map(dataOrThrowErrors), - map(({ repository }) => repository?.commit?.blob?.blame) + map(({ repository }) => repository?.commit?.blob?.blame), + map(hunks => (hunks ? hunks.map(blame => addDisplayInfoForHunk(blame, sourcegraphURL)) : undefined)), + map(hunks => ({ current: hunks })) ), makeRepoURI ) +interface RawStreamHunk { + author: { + Name: string + Email: string + Date: string + } + commit: { + parents: string[] + url: string + } + commitID: string + endLine: number + startLine: number + filename: string + message: string +} + +const fetchBlameViaStreaming = memoizeObservable( + ({ + repoName, + revision, + filePath, + sourcegraphURL, + }: { + repoName: string + revision: string + filePath: string + sourcegraphURL: string + }): Observable<{ current: BlameHunk[] | undefined }> => + new Observable<{ current: BlameHunk[] | undefined }>(subscriber => { + const assembledHunks: BlameHunk[] = [] + const repoAndRevisionPath = `/${repoName}${revision ? `@${revision}` : ''}` + fetchEventSource(`/.api/blame${repoAndRevisionPath}/stream/${filePath}`, { + method: 'GET', + headers: { + 'X-Requested-With': 'Sourcegraph', + 'X-Sourcegraph-Should-Trace': new URLSearchParams(window.location.search).get('trace') || 'false', + }, + onmessage(event) { + if (event.event === 'hunk') { + const rawHunks: RawStreamHunk[] = JSON.parse(event.data) + for (const rawHunk of rawHunks) { + const hunk: Omit = { + startLine: rawHunk.startLine, + endLine: rawHunk.endLine, + message: rawHunk.message, + rev: rawHunk.commitID, + author: { + date: rawHunk.author.Date, + person: { + email: rawHunk.author.Email, + displayName: rawHunk.author.Name, + user: null, + }, + }, + commit: { + url: rawHunk.commit.url, + parents: rawHunk.commit.parents ? rawHunk.commit.parents.map(oid => ({ oid })) : [], + }, + } + assembledHunks.push(addDisplayInfoForHunk(hunk, sourcegraphURL)) + } + subscriber.next({ current: assembledHunks }) + } + }, + onerror(event) { + // eslint-disable-next-line no-console + console.error(event) + }, + }).then( + () => subscriber.complete(), + error => subscriber.error(error) + ) + // Throttle the results to avoid re-rendering the blame sidebar for every hunk + }).pipe(throttleTime(1000, undefined, { leading: true, trailing: true })), + makeRepoURI +) + /** * Get display info shared between status bar items and text document decorations. */ -const getDisplayInfoFromHunk = ( - { author, commit, message }: Omit, - sourcegraphURL: string, - now: number -): BlameHunkDisplayInfo => { +const addDisplayInfoForHunk = (hunk: Omit, sourcegraphURL: string): BlameHunk => { + const now = Date.now() + const { author, commit, message } = hunk + const displayName = truncate(author.person.displayName, { length: 25 }) const username = author.person.user ? `(${author.person.user.username}) ` : '' const dateString = formatDistanceStrict(new Date(author.date), now, { addSuffix: true }) @@ -94,7 +200,7 @@ const getDisplayInfoFromHunk = ( const linkURL = new URL(commit.url, sourcegraphURL).href const content = `${dateString} • ${username}${displayName} [${truncate(message, { length: 45 })}]` - return { + ;(hunk as BlameHunk).displayInfo = { displayName, username, dateString, @@ -102,37 +208,44 @@ const getDisplayInfoFromHunk = ( linkURL, message: content, } + return hunk as BlameHunk } +/** + * For performance reasons, the hunks array can be mutated in place. To still be + * able to propagate updates accordingly, this is wrapped in a ref object that + * can be recreated whenever we emit new values. + */ export const useBlameHunks = ( { repoName, revision, filePath, + enableCodeMirror, }: { repoName: string revision: string filePath: string + enableCodeMirror: boolean }, sourcegraphURL: string -): BlameHunk[] | undefined => { +): { current: BlameHunk[] | undefined } => { + const [enableStreamingGitBlame, status] = useFeatureFlag('enable-streaming-git-blame') + const [isBlameVisible] = useBlameVisibility() + const shouldFetchBlame = isBlameVisible && status !== 'initial' + const hunks = useObservable( - useMemo(() => (isBlameVisible ? fetchBlame({ revision, repoName, filePath }) : of(undefined)), [ - isBlameVisible, - revision, - repoName, - filePath, - ]) + useMemo( + () => + shouldFetchBlame + ? enableCodeMirror && enableStreamingGitBlame + ? fetchBlameViaStreaming({ revision, repoName, filePath, sourcegraphURL }) + : fetchBlameViaGraphQL({ revision, repoName, filePath, sourcegraphURL }) + : of({ current: undefined }), + [shouldFetchBlame, enableCodeMirror, enableStreamingGitBlame, revision, repoName, filePath, sourcegraphURL] + ) ) - const hunksWithDisplayInfo = useMemo(() => { - const now = Date.now() - return hunks?.map(hunk => ({ - ...hunk, - displayInfo: getDisplayInfoFromHunk(hunk, sourcegraphURL, now), - })) - }, [hunks, sourcegraphURL]) - - return hunksWithDisplayInfo + return hunks || { current: undefined } } diff --git a/client/web/src/repo/blob/BlameColumn.tsx b/client/web/src/repo/blob/BlameColumn.tsx index d04619237815..ab4a993f22bb 100644 --- a/client/web/src/repo/blob/BlameColumn.tsx +++ b/client/web/src/repo/blob/BlameColumn.tsx @@ -12,7 +12,7 @@ import styles from './BlameColumn.module.scss' interface BlameColumnProps { isBlameVisible?: boolean - blameHunks?: BlameHunk[] + blameHunks?: { current: BlameHunk[] | undefined } codeViewElements: ReplaySubject history: History } @@ -98,7 +98,7 @@ export const BlameColumn = React.memo(({ isBlameVisible, codeV } } - const currentLineDecorations = blameHunks?.find(hunk => hunk.startLine - 1 === index) + const currentLineDecorations = blameHunks?.current?.find(hunk => hunk.startLine - 1 === index) // store created cell and corresponding blame hunk (or undefined if no blame hunk) addedCells.push([cell, currentLineDecorations]) diff --git a/client/web/src/repo/blob/Blob.tsx b/client/web/src/repo/blob/Blob.tsx index 487d16669f11..a899111d37b9 100644 --- a/client/web/src/repo/blob/Blob.tsx +++ b/client/web/src/repo/blob/Blob.tsx @@ -140,7 +140,7 @@ export interface BlobProps ariaLabel?: string isBlameVisible?: boolean - blameHunks?: BlameHunk[] + blameHunks?: { current: BlameHunk[] | undefined } } export interface BlobInfo extends AbsoluteRepoFile, ModeSpec { diff --git a/client/web/src/repo/blob/BlobPage.tsx b/client/web/src/repo/blob/BlobPage.tsx index ea0ede902524..7aed0fdfdf2a 100644 --- a/client/web/src/repo/blob/BlobPage.tsx +++ b/client/web/src/repo/blob/BlobPage.tsx @@ -330,7 +330,10 @@ export const BlobPage: React.FunctionComponent = props => { isBlameVisible, ]) const blameDecorations = useMemo( - () => (isBlameVisible && blameHunks ? [showGitBlameDecorations.of(blameHunks)] : []), + () => (isBlameVisible && blameHunks?.current ? [showGitBlameDecorations.of(blameHunks.current)] : []), [isBlameVisible, blameHunks] ) @@ -269,7 +269,7 @@ export const Blob: React.FunctionComponent = props => { }, [blameVisibility]) // Update blame decorations - useEffect(() => { + useLayoutEffect(() => { if (editor) { editor.dispatch({ effects: blameDecorationsCompartment.reconfigure(blameDecorations) }) } diff --git a/client/web/src/repo/blob/codemirror/blame-decorations.tsx b/client/web/src/repo/blob/codemirror/blame-decorations.tsx index 0d45257ebf7b..88707cec4f4e 100644 --- a/client/web/src/repo/blob/codemirror/blame-decorations.tsx +++ b/client/web/src/repo/blob/codemirror/blame-decorations.tsx @@ -111,14 +111,18 @@ export const showGitBlameDecorations = Facet.define({ ViewPlugin.fromClass( class { public decorations: DecorationSet + private previousHunkLength = -1 constructor(view: EditorView) { this.decorations = this.computeDecorations(view, facet) } public update(update: ViewUpdate): void { - if (update.docChanged || update.viewportChanged) { + const hunks = update.view.state.facet(facet) + + if (update.docChanged || update.viewportChanged || this.previousHunkLength !== hunks.length) { this.decorations = this.computeDecorations(update.view, facet) + this.previousHunkLength = update.state.facet(facet).length } } diff --git a/cmd/frontend/internal/httpapi/httpapi.go b/cmd/frontend/internal/httpapi/httpapi.go index 1a750fee0dd0..a7a0fb12b297 100644 --- a/cmd/frontend/internal/httpapi/httpapi.go +++ b/cmd/frontend/internal/httpapi/httpapi.go @@ -127,6 +127,9 @@ func NewHandler( // Return the minimum src-cli version that's compatible with this instance m.Get(apirouter.SrcCli).Handler(trace.Route(newSrcCliVersionHandler(logger))) + gsClient := gitserver.NewClient(db) + m.Get(apirouter.GitBlameStream).Handler(trace.Route(handleStreamBlame(logger, db, gsClient))) + // Set up the src-cli version cache handler (this will effectively be a // no-op anywhere other than dot-com). m.Get(apirouter.SrcCliVersionCache).Handler(trace.Route(releasecache.NewHandler(logger))) diff --git a/cmd/frontend/internal/httpapi/router/router.go b/cmd/frontend/internal/httpapi/router/router.go index 3d1a5870f715..5b239ea106cb 100644 --- a/cmd/frontend/internal/httpapi/router/router.go +++ b/cmd/frontend/internal/httpapi/router/router.go @@ -11,8 +11,9 @@ const ( LSIFUpload = "lsif.upload" GraphQL = "graphql" - SearchStream = "search.stream" - ComputeStream = "compute.stream" + SearchStream = "search.stream" + ComputeStream = "compute.stream" + GitBlameStream = "git.blame.stream" SrcCli = "src-cli" SrcCliVersionCache = "src-cli.version-cache" @@ -69,6 +70,7 @@ func New(base *mux.Router) *mux.Router { base.Path("/lsif/upload").Methods("POST").Name(LSIFUpload) base.Path("/search/stream").Methods("GET").Name(SearchStream) base.Path("/compute/stream").Methods("GET", "POST").Name(ComputeStream) + base.Path("/blame/" + routevar.Repo + routevar.RepoRevSuffix + "/stream/{Path:.*}").Methods("GET").Name(GitBlameStream) base.Path("/src-cli/versions/{rest:.*}").Methods("GET", "POST").Name(SrcCliVersionCache) base.Path("/src-cli/{rest:.*}").Methods("GET").Name(SrcCli) diff --git a/cmd/frontend/internal/httpapi/stream_blame.go b/cmd/frontend/internal/httpapi/stream_blame.go new file mode 100644 index 000000000000..3238a06a844a --- /dev/null +++ b/cmd/frontend/internal/httpapi/stream_blame.go @@ -0,0 +1,168 @@ +package httpapi + +import ( + "fmt" + "html" + "net/http" + "strings" + + "github.com/gorilla/mux" + + otlog "github.com/opentracing/opentracing-go/log" + + "github.com/sourcegraph/log" + "github.com/sourcegraph/sourcegraph/cmd/frontend/internal/handlerutil" + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/authz" + "github.com/sourcegraph/sourcegraph/internal/database" + "github.com/sourcegraph/sourcegraph/internal/errcode" + "github.com/sourcegraph/sourcegraph/internal/featureflag" + "github.com/sourcegraph/sourcegraph/internal/gitserver" + "github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain" + streamhttp "github.com/sourcegraph/sourcegraph/internal/search/streaming/http" + "github.com/sourcegraph/sourcegraph/internal/trace" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +// handleStreamBlame returns a HTTP handler that streams back the results of running +// git blame with the --incremental flag. It will stream back to the client the most +// recent hunks first and will gradually reach the oldests, or not if we timeout +// before that. +func handleStreamBlame(logger log.Logger, db database.DB, gitserverClient gitserver.Client) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + flags := featureflag.FromContext(r.Context()) + if !flags.GetBoolOr("enable-streaming-git-blame", false) { + w.WriteHeader(404) + return + } + tr, ctx := trace.New(r.Context(), "blame.Stream", "") + defer tr.Finish() + r = r.WithContext(ctx) + + if _, ok := mux.Vars(r)["Repo"]; !ok { + w.WriteHeader(http.StatusUnprocessableEntity) + return + } + + repo, commitID, err := handlerutil.GetRepoAndRev(r.Context(), logger, db, mux.Vars(r)) + if errors.HasType(err, &gitdomain.RevisionNotFoundError{}) { + w.WriteHeader(http.StatusNotFound) + return + } + if errors.HasType(err, &gitserver.RepoNotCloneableErr{}) { + if errcode.IsNotFound(err) { + w.WriteHeader(http.StatusNotFound) + return + } + w.WriteHeader(http.StatusInternalServerError) + return + } + if errcode.IsNotFound(err) || errcode.IsBlocked(err) { + w.WriteHeader(http.StatusNotFound) + return + } + if errcode.IsUnauthorized(err) { + w.WriteHeader(http.StatusUnauthorized) + return + } + + requestedPath := mux.Vars(r)["Path"] + streamWriter, err := streamhttp.NewWriter(w) + if err != nil { + tr.SetError(err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + // Log events to trace + streamWriter.StatHook = func(stat streamhttp.WriterStat) { + fields := []otlog.Field{ + otlog.String("streamhttp.Event", stat.Event), + otlog.Int("bytes", stat.Bytes), + otlog.Int64("duration_ms", stat.Duration.Milliseconds()), + } + if stat.Error != nil { + fields = append(fields, otlog.Error(stat.Error)) + } + tr.LogFields(fields...) + } + + requestedPath = strings.TrimPrefix(requestedPath, "/") + + hunkReader, err := gitserverClient.StreamBlameFile(r.Context(), authz.DefaultSubRepoPermsChecker, repo.Name, requestedPath, &gitserver.BlameOptions{ + NewestCommit: commitID, + }) + if err != nil { + tr.SetError(err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + parentsCache := map[api.CommitID][]api.CommitID{} + + for { + hunks, done, err := hunkReader.Read() + if err != nil { + tr.SetError(err) + http.Error(w, html.EscapeString(err.Error()), http.StatusInternalServerError) + return + } + if done { + streamWriter.Event("done", map[string]any{}) + return + } + + blameResponses := make([]BlameHunkResponse, 0, len(hunks)) + for _, h := range hunks { + var parents []api.CommitID + if p, ok := parentsCache[h.CommitID]; ok { + parents = p + } else { + c, err := gitserverClient.GetCommit(ctx, repo.Name, h.CommitID, gitserver.ResolveRevisionOptions{}, authz.DefaultSubRepoPermsChecker) + if err != nil { + tr.SetError(err) + http.Error(w, html.EscapeString(err.Error()), http.StatusInternalServerError) + return + } + parents = c.Parents + parentsCache[h.CommitID] = c.Parents + } + + blameResponse := BlameHunkResponse{ + StartLine: h.StartLine, + EndLine: h.EndLine, + CommitID: h.CommitID, + Author: h.Author, + Message: h.Message, + Filename: h.Filename, + Commit: BlameHunkCommitResponse{ + Parents: parents, + URL: fmt.Sprintf("%s/-/commit/%s", repo.URI, h.CommitID), + }, + } + blameResponses = append(blameResponses, blameResponse) + } + + if err := streamWriter.Event("hunk", blameResponses); err != nil { + tr.SetError(err) + http.Error(w, html.EscapeString(err.Error()), http.StatusInternalServerError) + return + } + } + } +} + +type BlameHunkResponse struct { + api.CommitID `json:"commitID"` + + StartLine int `json:"startLine"` // 1-indexed start line number + EndLine int `json:"endLine"` // 1-indexed end line number + Author gitdomain.Signature `json:"author"` + Message string `json:"message"` + Filename string `json:"filename"` + Commit BlameHunkCommitResponse `json:"commit"` +} + +type BlameHunkCommitResponse struct { + Parents []api.CommitID `json:"parents"` + URL string `json:"url"` +} diff --git a/cmd/frontend/internal/httpapi/stream_blame_test.go b/cmd/frontend/internal/httpapi/stream_blame_test.go new file mode 100644 index 000000000000..ab807754cb22 --- /dev/null +++ b/cmd/frontend/internal/httpapi/stream_blame_test.go @@ -0,0 +1,233 @@ +package httpapi + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/gorilla/mux" + "github.com/sourcegraph/log/logtest" + "github.com/sourcegraph/sourcegraph/cmd/frontend/backend" + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/authz" + "github.com/sourcegraph/sourcegraph/internal/database" + "github.com/sourcegraph/sourcegraph/internal/featureflag" + "github.com/sourcegraph/sourcegraph/internal/gitserver" + "github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain" + "github.com/sourcegraph/sourcegraph/internal/types" + "github.com/sourcegraph/sourcegraph/lib/errors" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func setupMockGSClient(t *testing.T, wantRev api.CommitID, returnErr error, hunks []*gitserver.Hunk) gitserver.Client { + hunkReader := gitserver.NewMockHunkReader(hunks, returnErr) + gsClient := gitserver.NewMockClient() + gsClient.GetCommitFunc.SetDefaultHook( + func(_ context.Context, + repoName api.RepoName, + commit api.CommitID, + opts gitserver.ResolveRevisionOptions, + checker authz.SubRepoPermissionChecker, + ) (*gitdomain.Commit, error) { + return &gitdomain.Commit{ + Parents: []api.CommitID{"xxx", "yyy"}, + }, nil + }) + gsClient.StreamBlameFileFunc.SetDefaultHook( + func( + ctx context.Context, + checker authz.SubRepoPermissionChecker, + repo api.RepoName, + path string, + opts *gitserver.BlameOptions, + ) (gitserver.HunkReader, error) { + if want, got := wantRev, opts.NewestCommit; want != got { + t.Logf("want %s, got %s", want, got) + t.Fail() + } + return hunkReader, nil + }) + return gsClient +} + +func TestStreamBlame(t *testing.T) { + logger, _ := logtest.Captured(t) + + hunks := []*gitserver.Hunk{ + { + StartLine: 1, + EndLine: 2, + CommitID: api.CommitID("abcd"), + Author: gitdomain.Signature{ + Name: "Bob", + Email: "bob@internet.com", + Date: time.Now(), + }, + Message: "one", + Filename: "foo.c", + }, + { + StartLine: 2, + EndLine: 3, + CommitID: api.CommitID("ijkl"), + Author: gitdomain.Signature{ + Name: "Bob", + Email: "bob@internet.com", + Date: time.Now(), + }, + Message: "two", + Filename: "foo.c", + }, + } + + db := database.NewMockDB() + backend.Mocks.Repos.GetByName = func(ctx context.Context, name api.RepoName) (*types.Repo, error) { + if name == "github.com/bob/foo" { + return &types.Repo{Name: name}, nil + } + return nil, &database.RepoNotFoundErr{Name: name} + } + backend.Mocks.Repos.Get = func(ctx context.Context, repo api.RepoID) (*types.Repo, error) { + return &types.Repo{Name: "github.com/bob/foo"}, nil + } + backend.Mocks.Repos.ResolveRev = func(ctx context.Context, repo *types.Repo, rev string) (api.CommitID, error) { + switch rev { + case "1234": + return api.CommitID("efgh"), nil + case "": + return api.CommitID("abcd"), nil + default: + return api.CommitID(""), &gitdomain.RevisionNotFoundError{Repo: repo.Name} + } + } + + t.Cleanup(func() { + backend.Mocks.Repos = backend.MockRepos{} + }) + + ffs := featureflag.NewMemoryStore(nil, nil, map[string]bool{"enable-streaming-git-blame": true}) + ctx := featureflag.WithFlags(context.Background(), ffs) + + t.Run("NOK feature flag disabled", func(t *testing.T) { + rec := httptest.NewRecorder() + req, err := http.NewRequest(http.MethodGet, "/no-vars", nil) + require.NoError(t, err) + req = req.WithContext(context.Background()) // no feature flag there + + gsClient := setupMockGSClient(t, "abcd", nil, hunks) + handleStreamBlame(logger, db, gsClient).ServeHTTP(rec, req) + assert.Equal(t, http.StatusNotFound, rec.Code) + }) + + t.Run("NOK no mux vars", func(t *testing.T) { + rec := httptest.NewRecorder() + req, err := http.NewRequest(http.MethodGet, "/no-vars", nil) + require.NoError(t, err) + req = req.WithContext(ctx) + + gsClient := setupMockGSClient(t, "abcd", nil, hunks) + handleStreamBlame(logger, db, gsClient).ServeHTTP(rec, req) + assert.Equal(t, http.StatusUnprocessableEntity, rec.Code) + }) + + t.Run("NOK repo not found", func(t *testing.T) { + rec := httptest.NewRecorder() + req, err := http.NewRequest(http.MethodGet, "/", nil) + require.NoError(t, err) + req = req.WithContext(ctx) + + req = mux.SetURLVars(req, map[string]string{ + "Repo": "github.com/bob/bar", + "path": "foo.c", + }) + gsClient := setupMockGSClient(t, "abcd", nil, hunks) + handleStreamBlame(logger, db, gsClient).ServeHTTP(rec, req) + assert.Equal(t, http.StatusNotFound, rec.Code) + }) + + t.Run("NOK rev not found", func(t *testing.T) { + rec := httptest.NewRecorder() + req, err := http.NewRequest(http.MethodGet, "/", nil) + require.NoError(t, err) + req = req.WithContext(ctx) + + req = mux.SetURLVars(req, map[string]string{ + "Repo": "github.com/bob/foo", + "path": "foo.c", + "Rev": "@void", + }) + gsClient := setupMockGSClient(t, "abcd", nil, hunks) + handleStreamBlame(logger, db, gsClient).ServeHTTP(rec, req) + assert.Equal(t, http.StatusNotFound, rec.Code) + }) + + t.Run("OK no rev", func(t *testing.T) { + rec := httptest.NewRecorder() + req, err := http.NewRequest(http.MethodGet, "/", nil) + require.NoError(t, err) + req = req.WithContext(ctx) + + req = mux.SetURLVars(req, map[string]string{ + "Repo": "github.com/bob/foo", + "path": "foo.c", + }) + gsClient := setupMockGSClient(t, "abcd", nil, hunks) + handleStreamBlame(logger, db, gsClient).ServeHTTP(rec, req) + assert.Equal(t, http.StatusOK, rec.Code) + data := rec.Body.String() + assert.Contains(t, data, `"commitID":"abcd"`) + assert.Contains(t, data, `"commitID":"ijkl"`) + assert.Contains(t, data, `done`) + }) + + t.Run("OK rev", func(t *testing.T) { + rec := httptest.NewRecorder() + req, err := http.NewRequest(http.MethodGet, "/", nil) + require.NoError(t, err) + req = req.WithContext(ctx) + + req = mux.SetURLVars(req, map[string]string{ + "Rev": "@1234", + "Repo": "github.com/bob/foo", + "path": "foo.c", + }) + gsClient := setupMockGSClient(t, "efgh", nil, []*gitserver.Hunk{ + { + StartLine: 1, + EndLine: 2, + CommitID: api.CommitID("efgh"), + Author: gitdomain.Signature{ + Name: "Bob", + Email: "bob@internet.com", + Date: time.Now(), + }, + Message: "one", + Filename: "foo.c", + }, + }) + + handleStreamBlame(logger, db, gsClient).ServeHTTP(rec, req) + assert.Equal(t, http.StatusOK, rec.Code) + data := rec.Body.String() + assert.Contains(t, data, `"commitID":"efgh"`) + assert.Contains(t, data, `done`) + }) + + t.Run("NOK err reading hunks", func(t *testing.T) { + rec := httptest.NewRecorder() + req, err := http.NewRequest(http.MethodGet, "/", nil) + require.NoError(t, err) + req = req.WithContext(ctx) + + req = mux.SetURLVars(req, map[string]string{ + "Repo": "github.com/bob/foo", + "path": "foo.c", + }) + gsClient := setupMockGSClient(t, "abcd", errors.New("foo"), hunks) + handleStreamBlame(logger, db, gsClient).ServeHTTP(rec, req) + assert.Equal(t, http.StatusInternalServerError, rec.Code) + }) +} diff --git a/internal/gitserver/client.go b/internal/gitserver/client.go index 6473113c96ed..6fa184a9a6ca 100644 --- a/internal/gitserver/client.go +++ b/internal/gitserver/client.go @@ -25,7 +25,6 @@ import ( "github.com/opentracing/opentracing-go/log" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/sourcegraph/sourcegraph/internal/actor" "golang.org/x/sync/errgroup" "golang.org/x/sync/semaphore" @@ -34,6 +33,7 @@ import ( "github.com/sourcegraph/go-diff/diff" "github.com/sourcegraph/go-rendezvous" + "github.com/sourcegraph/sourcegraph/internal/actor" "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/authz" "github.com/sourcegraph/sourcegraph/internal/conf" @@ -219,6 +219,10 @@ type RawBatchLogResult struct { } type BatchLogCallback func(repoCommit api.RepoCommit, gitLogResult RawBatchLogResult) error +type HunkReader interface { + Read() (hunks []*Hunk, done bool, err error) +} + type Client interface { // AddrForRepo returns the gitserver address to use for the given repo name. AddrForRepo(context.Context, api.RepoName) (string, error) @@ -234,6 +238,8 @@ type Client interface { // BlameFile returns Git blame information about a file. BlameFile(ctx context.Context, checker authz.SubRepoPermissionChecker, repo api.RepoName, path string, opt *BlameOptions) ([]*Hunk, error) + StreamBlameFile(ctx context.Context, checker authz.SubRepoPermissionChecker, repo api.RepoName, path string, opt *BlameOptions) (HunkReader, error) + // CreateCommitFromPatch will attempt to create a commit from a patch // If possible, the error returned will be of type protocol.CreateCommitFromPatchError CreateCommitFromPatch(context.Context, protocol.CreateCommitFromPatchRequest) (string, error) diff --git a/internal/gitserver/commands.go b/internal/gitserver/commands.go index ef919ea5e9f7..ff90b862b611 100644 --- a/internal/gitserver/commands.go +++ b/internal/gitserver/commands.go @@ -687,6 +687,59 @@ type Hunk struct { Filename string } +// StreamBlameFile returns Git blame information about a file. +func (c *clientImplementor) StreamBlameFile(ctx context.Context, checker authz.SubRepoPermissionChecker, repo api.RepoName, path string, opt *BlameOptions) (HunkReader, error) { + span, ctx := ot.StartSpanFromContext(ctx, "Git: StreamBlameFile") + span.SetTag("repo", repo) + span.SetTag("path", path) + span.SetTag("opt", opt) + defer span.Finish() + + return streamBlameFileCmd(ctx, checker, repo, path, opt, c.gitserverGitCommandFunc(repo)) +} + +type errUnauthorizedStreamBlame struct { + Repo api.RepoName +} + +func (e errUnauthorizedStreamBlame) Unauthorized() bool { + return true +} + +func (e errUnauthorizedStreamBlame) Error() string { + return fmt.Sprintf("not authorized (name=%s)", e.Repo) +} + +func streamBlameFileCmd(ctx context.Context, checker authz.SubRepoPermissionChecker, repo api.RepoName, path string, opt *BlameOptions, command gitCommandFunc) (HunkReader, error) { + a := actor.FromContext(ctx) + hasAccess, err := authz.FilterActorPath(ctx, checker, a, repo, path) + if err != nil { + return nil, err + } + if !hasAccess { + return nil, errUnauthorizedStreamBlame{Repo: repo} + } + if opt == nil { + opt = &BlameOptions{} + } + if err := checkSpecArgSafety(string(opt.NewestCommit)); err != nil { + return nil, err + } + + args := []string{"blame", "-w", "--porcelain", "--incremental"} + if opt.StartLine != 0 || opt.EndLine != 0 { + args = append(args, fmt.Sprintf("-L%d,%d", opt.StartLine, opt.EndLine)) + } + args = append(args, string(opt.NewestCommit), "--", filepath.ToSlash(path)) + + rc, err := command(args).StdoutReader(ctx) + if err != nil { + return nil, errors.WithMessage(err, fmt.Sprintf("git command %v failed", args)) + } + + return newBlameHunkReader(ctx, rc), nil +} + // BlameFile returns Git blame information about a file. func (c *clientImplementor) BlameFile(ctx context.Context, checker authz.SubRepoPermissionChecker, repo api.RepoName, path string, opt *BlameOptions) ([]*Hunk, error) { span, ctx := ot.StartSpanFromContext(ctx, "Git: BlameFile") @@ -723,10 +776,15 @@ func blameFileCmd(ctx context.Context, command gitCommandFunc, path string, opt return nil, nil } + return parseGitBlameOutput(string(out)) +} + +// parseGitBlameOutput parses the output of `git blame -w --porcelain` +func parseGitBlameOutput(out string) ([]*Hunk, error) { commits := make(map[string]gitdomain.Commit) filenames := make(map[string]string) hunks := make([]*Hunk, 0) - remainingLines := strings.Split(string(out[:len(out)-1]), "\n") + remainingLines := strings.Split(out[:len(out)-1], "\n") byteOffset := 0 for len(remainingLines) > 0 { // Consume hunk diff --git a/internal/gitserver/commands_test.go b/internal/gitserver/commands_test.go index 394608fa27e8..8c9ef4c97cf5 100644 --- a/internal/gitserver/commands_test.go +++ b/internal/gitserver/commands_test.go @@ -25,6 +25,7 @@ import ( "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/authz" "github.com/sourcegraph/sourcegraph/internal/database" + "github.com/sourcegraph/sourcegraph/internal/errcode" "github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain" "github.com/sourcegraph/sourcegraph/internal/types" "github.com/sourcegraph/sourcegraph/lib/errors" @@ -2932,3 +2933,381 @@ size 12 t.Fatalf("unexpected LFS pointer (-want, +got):\n%s", d) } } + +// testGitBlameOutput is produced by running +// +// git blame -w --porcelain release.sh +// +// `sourcegraph/src-cli` +const testGitBlameOutput = `3f61310114082d6179c23f75950b88d1842fe2de 1 1 4 +author Thorsten Ball +author-mail +author-time 1592827635 +author-tz +0200 +committer GitHub +committer-mail +committer-time 1592827635 +committer-tz +0200 +summary Check that $VERSION is in MAJOR.MINOR.PATCH format in release.sh (#227) +previous ec809e79094cbcd05825446ee14c6d072466a0b7 release.sh +filename release.sh + #!/usr/bin/env bash +3f61310114082d6179c23f75950b88d1842fe2de 2 2 + +3f61310114082d6179c23f75950b88d1842fe2de 3 3 + set -euf -o pipefail +3f61310114082d6179c23f75950b88d1842fe2de 4 4 + +fbb98e0b7ff0752798463d9f49d922858a4188f6 5 5 10 +author Adam Harvey +author-mail +author-time 1602630694 +author-tz -0700 +committer GitHub +committer-mail +committer-time 1602630694 +committer-tz -0700 +summary release: add a prompt about DEVELOPMENT.md (#349) +previous 18f59760f4260518c29f0f07056245ed5d1d0f08 release.sh +filename release.sh + read -p 'Have you read DEVELOPMENT.md? [y/N] ' -n 1 -r +fbb98e0b7ff0752798463d9f49d922858a4188f6 6 6 + echo +fbb98e0b7ff0752798463d9f49d922858a4188f6 7 7 + case "$REPLY" in +fbb98e0b7ff0752798463d9f49d922858a4188f6 8 8 + Y | y) ;; +fbb98e0b7ff0752798463d9f49d922858a4188f6 9 9 + *) +fbb98e0b7ff0752798463d9f49d922858a4188f6 10 10 + echo 'Please read the Releasing section of DEVELOPMENT.md before running this script.' +fbb98e0b7ff0752798463d9f49d922858a4188f6 11 11 + exit 1 +fbb98e0b7ff0752798463d9f49d922858a4188f6 12 12 + ;; +fbb98e0b7ff0752798463d9f49d922858a4188f6 13 13 + esac +fbb98e0b7ff0752798463d9f49d922858a4188f6 14 14 + +8a75c6f8b4cbe2a2f3c8be0f2c50bc766499f498 15 15 1 +author Adam Harvey +author-mail +author-time 1660860583 +author-tz -0700 +committer GitHub +committer-mail +committer-time 1660860583 +committer-tz +0000 +summary release.sh: allow -rc.X suffixes (#829) +previous e6e03e850770dd0ba745f0fa4b23127e9d72ad30 release.sh +filename release.sh + if ! echo "$VERSION" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)?$'; then +3f61310114082d6179c23f75950b88d1842fe2de 6 16 4 + echo "\$VERSION is not in MAJOR.MINOR.PATCH format" +3f61310114082d6179c23f75950b88d1842fe2de 7 17 + exit 1 +3f61310114082d6179c23f75950b88d1842fe2de 8 18 + fi +3f61310114082d6179c23f75950b88d1842fe2de 9 19 + +67b7b725a7ff913da520b997d71c840230351e30 10 20 1 +author Thorsten Ball +author-mail +author-time 1600334460 +author-tz +0200 +committer Thorsten Ball +committer-mail +committer-time 1600334460 +committer-tz +0200 +summary Fix goreleaser GitHub action setup and release script +previous 6e931cc9745502184ce32d48b01f9a8706a4dfe8 release.sh +filename release.sh + # Create a new tag and push it, this will trigger the goreleaser workflow in .github/workflows/goreleaser.yml +3f61310114082d6179c23f75950b88d1842fe2de 10 21 1 + git tag "${VERSION}" -a -m "release v${VERSION}" +67b7b725a7ff913da520b997d71c840230351e30 12 22 2 + # We use --atomic so that we push the tag and the commit if the commit was or wasn't pushed before +67b7b725a7ff913da520b997d71c840230351e30 13 23 + git push --atomic origin main "${VERSION}" +` + +var testGitBlameOutputIncremental = `8a75c6f8b4cbe2a2f3c8be0f2c50bc766499f498 15 15 1 +author Adam Harvey +author-mail +author-time 1660860583 +author-tz -0700 +committer GitHub +committer-mail +committer-time 1660860583 +committer-tz +0000 +summary release.sh: allow -rc.X suffixes (#829) +previous e6e03e850770dd0ba745f0fa4b23127e9d72ad30 release.sh +filename release.sh +fbb98e0b7ff0752798463d9f49d922858a4188f6 5 5 10 +author Adam Harvey +author-mail +author-time 1602630694 +author-tz -0700 +committer GitHub +committer-mail +committer-time 1602630694 +committer-tz -0700 +summary release: add a prompt about DEVELOPMENT.md (#349) +previous 18f59760f4260518c29f0f07056245ed5d1d0f08 release.sh +filename release.sh +67b7b725a7ff913da520b997d71c840230351e30 10 20 1 +author Thorsten Ball +author-mail +author-time 1600334460 +author-tz +0200 +committer Thorsten Ball +committer-mail +committer-time 1600334460 +committer-tz +0200 +summary Fix goreleaser GitHub action setup and release script +previous 6e931cc9745502184ce32d48b01f9a8706a4dfe8 release.sh +filename release.sh +67b7b725a7ff913da520b997d71c840230351e30 12 22 2 +previous 6e931cc9745502184ce32d48b01f9a8706a4dfe8 release.sh +filename release.sh +3f61310114082d6179c23f75950b88d1842fe2de 1 1 4 +author Thorsten Ball +author-mail +author-time 1592827635 +author-tz +0200 +committer GitHub +committer-mail +committer-time 1592827635 +committer-tz +0200 +summary Check that $VERSION is in MAJOR.MINOR.PATCH format in release.sh (#227) +previous ec809e79094cbcd05825446ee14c6d072466a0b7 release.sh +filename release.sh +3f61310114082d6179c23f75950b88d1842fe2de 6 16 4 +previous ec809e79094cbcd05825446ee14c6d072466a0b7 release.sh +filename release.sh +3f61310114082d6179c23f75950b88d1842fe2de 10 21 1 +previous ec809e79094cbcd05825446ee14c6d072466a0b7 release.sh +filename release.sh +` + +// This test-data includes the boundary keyword, which is not present in the previous one. +var testGitBlameOutputIncremental2 = `bbca6551549492486ca1b0f8dee45553dd6aa6d7 16 16 1 +author French Ben +author-mail +author-time 1517407262 +author-tz +0100 +committer French Ben +committer-mail +committer-time 1517407262 +committer-tz +0100 +summary Update error output to be clean +previous b7773ae218740a7be65057fc60b366a49b538a44 format.go +filename format.go +bbca6551549492486ca1b0f8dee45553dd6aa6d7 25 25 2 +previous b7773ae218740a7be65057fc60b366a49b538a44 format.go +filename format.go +2c87fda17de1def6ea288141b8e7600b888e535b 15 15 1 +author David Tolnay +author-mail +author-time 1478451741 +author-tz -0800 +committer David Tolnay +committer-mail +committer-time 1478451741 +committer-tz -0800 +summary Singular message for a single error +previous 8c5f0ad9360406a3807ce7de6bc73269a91a6e51 format.go +filename format.go +2c87fda17de1def6ea288141b8e7600b888e535b 17 17 2 +previous 8c5f0ad9360406a3807ce7de6bc73269a91a6e51 format.go +filename format.go +31fee45604949934710ada68f0b307c4726fb4e8 1 1 14 +author Mitchell Hashimoto +author-mail +author-time 1418673320 +author-tz -0800 +committer Mitchell Hashimoto +committer-mail +committer-time 1418673320 +committer-tz -0800 +summary Initial commit +boundary +filename format.go +31fee45604949934710ada68f0b307c4726fb4e8 15 19 6 +filename format.go +31fee45604949934710ada68f0b307c4726fb4e8 23 27 1 +filename format.go +` + +var testGitBlameOutputHunks = []*Hunk{ + { + StartLine: 1, EndLine: 5, StartByte: 0, EndByte: 43, + CommitID: "3f61310114082d6179c23f75950b88d1842fe2de", + Author: gitdomain.Signature{ + Name: "Thorsten Ball", + Email: "mrnugget@gmail.com", + Date: MustParseTime(time.RFC3339, "2020-06-22T12:07:15Z"), + }, + Message: "Check that $VERSION is in MAJOR.MINOR.PATCH format in release.sh (#227)", + Filename: "release.sh", + }, + { + StartLine: 5, EndLine: 15, StartByte: 43, EndByte: 252, + CommitID: "fbb98e0b7ff0752798463d9f49d922858a4188f6", + Author: gitdomain.Signature{ + Name: "Adam Harvey", + Email: "aharvey@sourcegraph.com", + Date: MustParseTime(time.RFC3339, "2020-10-13T23:11:34Z"), + }, + Message: "release: add a prompt about DEVELOPMENT.md (#349)", + Filename: "release.sh", + }, + { + StartLine: 15, EndLine: 16, StartByte: 252, EndByte: 331, + CommitID: "8a75c6f8b4cbe2a2f3c8be0f2c50bc766499f498", + Author: gitdomain.Signature{ + Name: "Adam Harvey", + Email: "adam@adamharvey.name", + Date: MustParseTime(time.RFC3339, "2022-08-18T22:09:43Z"), + }, + Message: "release.sh: allow -rc.X suffixes (#829)", + Filename: "release.sh", + }, + { + StartLine: 16, EndLine: 20, StartByte: 331, EndByte: 398, + CommitID: "3f61310114082d6179c23f75950b88d1842fe2de", + Author: gitdomain.Signature{ + Name: "Thorsten Ball", + Email: "mrnugget@gmail.com", + Date: MustParseTime(time.RFC3339, "2020-06-22T12:07:15Z"), + }, + Message: "Check that $VERSION is in MAJOR.MINOR.PATCH format in release.sh (#227)", + Filename: "release.sh", + }, + { + StartLine: 20, EndLine: 21, StartByte: 398, EndByte: 508, + CommitID: "67b7b725a7ff913da520b997d71c840230351e30", + Author: gitdomain.Signature{ + Name: "Thorsten Ball", + Email: "mrnugget@gmail.com", + Date: MustParseTime(time.RFC3339, "2020-09-17T09:21:00Z"), + }, + Message: "Fix goreleaser GitHub action setup and release script", + Filename: "release.sh", + }, + { + StartLine: 21, EndLine: 22, StartByte: 508, EndByte: 557, + CommitID: "3f61310114082d6179c23f75950b88d1842fe2de", + Author: gitdomain.Signature{ + Name: "Thorsten Ball", + Email: "mrnugget@gmail.com", + Date: MustParseTime(time.RFC3339, "2020-06-22T12:07:15Z"), + }, + Message: "Check that $VERSION is in MAJOR.MINOR.PATCH format in release.sh (#227)", + Filename: "release.sh", + }, + { + StartLine: 22, EndLine: 24, StartByte: 557, EndByte: 699, + CommitID: "67b7b725a7ff913da520b997d71c840230351e30", + Author: gitdomain.Signature{ + Name: "Thorsten Ball", + Email: "mrnugget@gmail.com", + Date: MustParseTime(time.RFC3339, "2020-09-17T09:21:00Z"), + }, + Message: "Fix goreleaser GitHub action setup and release script", + Filename: "release.sh", + }, +} + +func TestParseGitBlameOutput(t *testing.T) { + hunks, err := parseGitBlameOutput(testGitBlameOutput) + if err != nil { + t.Fatalf("parseGitBlameOutput failed: %s", err) + } + + if d := cmp.Diff(testGitBlameOutputHunks, hunks); d != "" { + t.Fatalf("unexpected hunks (-want, +got):\n%s", d) + } +} + +func TestStreamBlameFile(t *testing.T) { + t.Run("NOK unauthorized", func(t *testing.T) { + ctx := actor.WithActor(context.Background(), &actor.Actor{ + UID: 1, + }) + checker := authz.NewMockSubRepoPermissionChecker() + checker.EnabledFunc.SetDefaultHook(func() bool { + return true + }) + // User doesn't have access to this file + checker.PermissionsFunc.SetDefaultHook(func(ctx context.Context, i int32, content authz.RepoContent) (authz.Perms, error) { + return authz.None, nil + }) + hr, err := streamBlameFileCmd(ctx, checker, api.RepoName("foobar"), "README.md", nil, func(_ []string) GitCommand { return nil }) + if hr != nil { + t.Fatalf("expected nil HunkReader") + } + if err == nil { + t.Fatalf("expected an error to be returned") + } + if !errcode.IsUnauthorized(err) { + t.Fatalf("expected err to be an authorization error, got %v", err) + } + }) +} + +func TestBlameHunkReader(t *testing.T) { + t.Run("OK matching hunks", func(t *testing.T) { + rc := io.NopCloser(strings.NewReader(testGitBlameOutputIncremental)) + reader := newBlameHunkReader(context.Background(), rc) + + hunks := []*Hunk{} + for { + hunk, done, err := reader.Read() + if err != nil { + t.Fatalf("blameHunkReader.Read failed: %s", err) + } + if done { + break + } + hunks = append(hunks, hunk...) + } + + sortFn := func(x []*Hunk) func(i, j int) bool { + return func(i, j int) bool { + return x[i].Author.Date.After(x[j].Author.Date) + } + } + + // We're not giving back bytes, as the output of --incremental only gives back annotations. + expectedHunks := make([]*Hunk, 0, len(testGitBlameOutputHunks)) + for _, h := range testGitBlameOutputHunks { + dup := *h + dup.EndByte = 0 + dup.StartByte = 0 + expectedHunks = append(expectedHunks, &dup) + } + + // Sort expected hunks by the most recent first, as --incremental does. + sort.SliceStable(expectedHunks, sortFn(expectedHunks)) + + if d := cmp.Diff(expectedHunks, hunks); d != "" { + t.Fatalf("unexpected hunks (-want, +got):\n%s", d) + } + }) + + t.Run("OK parsing hunks", func(t *testing.T) { + rc := io.NopCloser(strings.NewReader(testGitBlameOutputIncremental2)) + reader := newBlameHunkReader(context.Background(), rc) + + for { + _, done, err := reader.Read() + if err != nil { + t.Fatalf("blameHunkReader.Read failed: %s", err) + } + if done { + break + } + } + }) +} diff --git a/internal/gitserver/mocks_temp.go b/internal/gitserver/mocks_temp.go index afcb87b798f2..b1f9309421d9 100644 --- a/internal/gitserver/mocks_temp.go +++ b/internal/gitserver/mocks_temp.go @@ -185,6 +185,9 @@ type MockClient struct { // StatFunc is an instance of a mock function object controlling the // behavior of the method Stat. StatFunc *ClientStatFunc + // StreamBlameFileFunc is an instance of a mock function object + // controlling the behavior of the method StreamBlameFile. + StreamBlameFileFunc *ClientStreamBlameFileFunc } // NewMockClient creates a new mock of the Client interface. All methods @@ -456,6 +459,11 @@ func NewMockClient() *MockClient { return }, }, + StreamBlameFileFunc: &ClientStreamBlameFileFunc{ + defaultHook: func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (r0 HunkReader, r1 error) { + return + }, + }, } } @@ -728,6 +736,11 @@ func NewStrictMockClient() *MockClient { panic("unexpected invocation of MockClient.Stat") }, }, + StreamBlameFileFunc: &ClientStreamBlameFileFunc{ + defaultHook: func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (HunkReader, error) { + panic("unexpected invocation of MockClient.StreamBlameFile") + }, + }, } } @@ -894,6 +907,9 @@ func NewMockClientFrom(i Client) *MockClient { StatFunc: &ClientStatFunc{ defaultHook: i.Stat, }, + StreamBlameFileFunc: &ClientStreamBlameFileFunc{ + defaultHook: i.StreamBlameFile, + }, } } @@ -6871,3 +6887,120 @@ func (c ClientStatFuncCall) Args() []interface{} { func (c ClientStatFuncCall) Results() []interface{} { return []interface{}{c.Result0, c.Result1} } + +// ClientStreamBlameFileFunc describes the behavior when the StreamBlameFile +// method of the parent MockClient instance is invoked. +type ClientStreamBlameFileFunc struct { + defaultHook func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (HunkReader, error) + hooks []func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (HunkReader, error) + history []ClientStreamBlameFileFuncCall + mutex sync.Mutex +} + +// StreamBlameFile delegates to the next hook function in the queue and +// stores the parameter and result values of this invocation. +func (m *MockClient) StreamBlameFile(v0 context.Context, v1 authz.SubRepoPermissionChecker, v2 api.RepoName, v3 string, v4 *BlameOptions) (HunkReader, error) { + r0, r1 := m.StreamBlameFileFunc.nextHook()(v0, v1, v2, v3, v4) + m.StreamBlameFileFunc.appendCall(ClientStreamBlameFileFuncCall{v0, v1, v2, v3, v4, r0, r1}) + return r0, r1 +} + +// SetDefaultHook sets function that is called when the StreamBlameFile +// method of the parent MockClient instance is invoked and the hook queue is +// empty. +func (f *ClientStreamBlameFileFunc) SetDefaultHook(hook func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (HunkReader, error)) { + f.defaultHook = hook +} + +// PushHook adds a function to the end of hook queue. Each invocation of the +// StreamBlameFile method of the parent MockClient instance invokes the hook +// at the front of the queue and discards it. After the queue is empty, the +// default hook function is invoked for any future action. +func (f *ClientStreamBlameFileFunc) PushHook(hook func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (HunkReader, error)) { + f.mutex.Lock() + f.hooks = append(f.hooks, hook) + f.mutex.Unlock() +} + +// SetDefaultReturn calls SetDefaultHook with a function that returns the +// given values. +func (f *ClientStreamBlameFileFunc) SetDefaultReturn(r0 HunkReader, r1 error) { + f.SetDefaultHook(func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (HunkReader, error) { + return r0, r1 + }) +} + +// PushReturn calls PushHook with a function that returns the given values. +func (f *ClientStreamBlameFileFunc) PushReturn(r0 HunkReader, r1 error) { + f.PushHook(func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (HunkReader, error) { + return r0, r1 + }) +} + +func (f *ClientStreamBlameFileFunc) nextHook() func(context.Context, authz.SubRepoPermissionChecker, api.RepoName, string, *BlameOptions) (HunkReader, error) { + f.mutex.Lock() + defer f.mutex.Unlock() + + if len(f.hooks) == 0 { + return f.defaultHook + } + + hook := f.hooks[0] + f.hooks = f.hooks[1:] + return hook +} + +func (f *ClientStreamBlameFileFunc) appendCall(r0 ClientStreamBlameFileFuncCall) { + f.mutex.Lock() + f.history = append(f.history, r0) + f.mutex.Unlock() +} + +// History returns a sequence of ClientStreamBlameFileFuncCall objects +// describing the invocations of this function. +func (f *ClientStreamBlameFileFunc) History() []ClientStreamBlameFileFuncCall { + f.mutex.Lock() + history := make([]ClientStreamBlameFileFuncCall, len(f.history)) + copy(history, f.history) + f.mutex.Unlock() + + return history +} + +// ClientStreamBlameFileFuncCall is an object that describes an invocation +// of method StreamBlameFile on an instance of MockClient. +type ClientStreamBlameFileFuncCall struct { + // Arg0 is the value of the 1st argument passed to this method + // invocation. + Arg0 context.Context + // Arg1 is the value of the 2nd argument passed to this method + // invocation. + Arg1 authz.SubRepoPermissionChecker + // Arg2 is the value of the 3rd argument passed to this method + // invocation. + Arg2 api.RepoName + // Arg3 is the value of the 4th argument passed to this method + // invocation. + Arg3 string + // Arg4 is the value of the 5th argument passed to this method + // invocation. + Arg4 *BlameOptions + // Result0 is the value of the 1st result returned from this method + // invocation. + Result0 HunkReader + // Result1 is the value of the 2nd result returned from this method + // invocation. + Result1 error +} + +// Args returns an interface slice containing the arguments of this +// invocation. +func (c ClientStreamBlameFileFuncCall) Args() []interface{} { + return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3, c.Arg4} +} + +// Results returns an interface slice containing the results of this +// invocation. +func (c ClientStreamBlameFileFuncCall) Results() []interface{} { + return []interface{}{c.Result0, c.Result1} +} diff --git a/internal/gitserver/stream_hunks.go b/internal/gitserver/stream_hunks.go new file mode 100644 index 000000000000..7c224bbe98c9 --- /dev/null +++ b/internal/gitserver/stream_hunks.go @@ -0,0 +1,268 @@ +package gitserver + +import ( + "bufio" + "context" + "io" + "strconv" + "strings" + "time" + + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +// blameHunkReader enables to read hunks from an io.Reader. +type blameHunkReader struct { + hunks chan hunkResult +} + +func newBlameHunkReader(ctx context.Context, rc io.ReadCloser) HunkReader { + br := &blameHunkReader{ + hunks: make(chan hunkResult), + } + go br.readHunks(ctx, rc) + return br +} + +type hunkResult struct { + hunk *Hunk + err error +} + +func (br *blameHunkReader) readHunks(ctx context.Context, rc io.ReadCloser) { + newHunkParser(rc, br.hunks).parse(ctx) +} + +// Read returns a slice of hunks, along with a done boolean indicating if there is more to +// read. +func (br *blameHunkReader) Read() ([]*Hunk, bool, error) { + res, ok := <-br.hunks + if !ok { + return nil, true, nil + } + if res.err != nil { + return nil, false, res.err + } else { + return []*Hunk{res.hunk}, false, nil + } +} + +type hunkParser struct { + rc io.ReadCloser + sc *bufio.Scanner + hunksCh chan hunkResult + + // commits stores previously seen commits, so new hunks + // whose annotations are abbreviated by git can still be + // filled by the correct data even if the hunk entry doesn't + // repeat them. + commits map[string]*Hunk +} + +func newHunkParser(rc io.ReadCloser, hunksCh chan hunkResult) hunkParser { + return hunkParser{ + rc: rc, + hunksCh: hunksCh, + + sc: bufio.NewScanner(rc), + commits: make(map[string]*Hunk), + } +} + +// parse processes the output from git blame and sends hunks over p.hunksCh +// for p.Read() to consume. If an error is encountered, it will be sent to +// p.hunksCh and will stop reading. +// +// Because we do not control when p.Read is called, we have to account for +// the context being cancelled, to avoid leaking the goroutine running p.parse. +func (p hunkParser) parse(ctx context.Context) { + defer p.rc.Close() + defer close(p.hunksCh) + + var cur *Hunk + for { + if err := ctx.Err(); err != nil { + return + } + + // Do we have more to read? + if !p.sc.Scan() { + if cur != nil { + if h, ok := p.commits[string(cur.CommitID)]; ok { + cur.CommitID = h.CommitID + cur.Author = h.Author + cur.Message = h.Message + } + // If we have an ongoing entry, send it. + select { + case p.hunksCh <- hunkResult{hunk: cur}: + case <-ctx.Done(): + return + } + } + break + } + + // Read line from git blame, in porcelain format + annotation, fields := p.scanLine() + + // On the first read, we have no hunk and the first thing we read is an entry. + if cur == nil { + var err error + cur, err = parseEntry(annotation, fields) + if err != nil { + select { + case p.hunksCh <- hunkResult{err: err}: + return + case <-ctx.Done(): + return + } + } + continue + } + + // After that, we're either reading extras, or a new entry. + ok, err := parseExtra(cur, annotation, fields) + if err != nil { + select { + case p.hunksCh <- hunkResult{err: err}: + return + case <-ctx.Done(): + return + } + } + // If we've finished reading extras, we're looking at a new entry. + if !ok { + if h, ok := p.commits[string(cur.CommitID)]; ok { + cur.CommitID = h.CommitID + cur.Author = h.Author + cur.Message = h.Message + } else { + p.commits[string(cur.CommitID)] = cur + } + + select { + case p.hunksCh <- hunkResult{hunk: cur}: + case <-ctx.Done(): + return + } + + cur, err = parseEntry(annotation, fields) + if err != nil { + select { + case p.hunksCh <- hunkResult{err: err}: + return + case <-ctx.Done(): + return + } + } + } + } + + // If there is an error from the scanner, send it back. + if err := p.sc.Err(); err != nil { + select { + case p.hunksCh <- hunkResult{err: err}: + return + case <-ctx.Done(): + return + } + } +} + +// parseEntry turns a `67b7b725a7ff913da520b997d71c840230351e30 10 20 1` line from +// git blame into a hunk. +func parseEntry(rev string, content string) (*Hunk, error) { + fields := strings.Split(content, " ") + if len(fields) != 3 { + return nil, errors.Errorf("Expected at least 4 parts to hunkHeader, but got: '%s %s'", rev, content) + } + + resultLine, err := strconv.Atoi(fields[1]) + if err != nil { + return nil, err + } + numLines, _ := strconv.Atoi(fields[2]) + if err != nil { + return nil, err + } + + return &Hunk{ + CommitID: api.CommitID(rev), + StartLine: resultLine, + EndLine: resultLine + numLines, + }, nil +} + +// parseExtra updates a hunk with data parsed from the other annotations such as `author ...`, +// `summary ...`. +func parseExtra(hunk *Hunk, annotation string, content string) (ok bool, err error) { + ok = true + switch annotation { + case "author": + hunk.Author.Name = content + case "author-mail": + if len(content) >= 2 && content[0] == '<' && content[len(content)-1] == '>' { + hunk.Author.Email = content[1 : len(content)-1] + } + case "author-time": + var t int64 + t, err = strconv.ParseInt(content, 10, 64) + hunk.Author.Date = time.Unix(t, 0).UTC() + case "author-tz": + // do nothing + case "committer", "committer-mail", "committer-tz", "committer-time": + case "summary": + hunk.Message = content + case "filename": + hunk.Filename = content + case "previous": + case "boundary": + default: + // If it doesn't look like an entry, it's probably an unhandled git blame + // annotation. + if len(annotation) != 40 && len(strings.Split(content, " ")) != 3 { + err = errors.Newf("unhandled git blame annotation: %s") + } + ok = false + } + return +} + +// scanLine reads a line from the scanner and returns the annotation along +// with the content, if any. +func (p hunkParser) scanLine() (string, string) { + line := p.sc.Text() + annotation, content, found := strings.Cut(line, " ") + if found { + return annotation, content + } + return line, "" +} + +type mockHunkReader struct { + hunks []*Hunk + err error + + idx int +} + +func NewMockHunkReader(hunks []*Hunk, err error) HunkReader { + return &mockHunkReader{ + hunks: hunks, + err: err, + } +} + +func (mh *mockHunkReader) Read() ([]*Hunk, bool, error) { + if mh.err != nil { + return nil, false, mh.err + } + if mh.idx < len(mh.hunks) { + idx := mh.idx + mh.idx++ + return []*Hunk{mh.hunks[idx]}, false, nil + } + return nil, true, nil +}