Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry commit loading if data is missing #78

Merged
merged 3 commits into from May 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
154 changes: 97 additions & 57 deletions pull/github.go
Expand Up @@ -22,7 +22,6 @@ import (

"github.com/google/go-github/github"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"github.com/shurcooL/githubv4"
)

Expand All @@ -36,6 +35,11 @@ const (
MaxPullRequestCommits = 250
)

var (
commitLoadMaxAttempts = 3
commitLoadBaseDelay = time.Second
)

// Locator identifies a pull request and optionally contains a full or partial
// pull request object.
type Locator struct {
Expand Down Expand Up @@ -221,12 +225,16 @@ func (ghc *GitHubContext) ChangedFiles() ([]*File, error) {

func (ghc *GitHubContext) Commits() ([]*Commit, error) {
if ghc.commits == nil {
if err := ghc.loadPagedData(); err != nil {
commits, err := ghc.loadCommits()
if err != nil {
return nil, err
}
}
if len(ghc.commits) >= MaxPullRequestCommits {
return nil, errors.Errorf("too many commits in pull request, maximum is %d", MaxPullRequestCommits)
if len(commits) >= MaxPullRequestCommits {
return nil, errors.Errorf("too many commits in pull request, maximum is %d", MaxPullRequestCommits)
}

backfillPushedAt(commits, ghc.pr.HeadRefOID)
ghc.commits = commits
}
return ghc.commits, nil
}
Expand All @@ -250,15 +258,10 @@ func (ghc *GitHubContext) Reviews() ([]*Review, error) {
}

func (ghc *GitHubContext) loadPagedData() error {
// this is a minor optimization: make max(c,m,r) requests instead of c+m+r
// this is a minor optimization: make max(c,r) requests instead of c+r
var q struct {
Repository struct {
PullRequest struct {
Commits struct {
PageInfo v4PageInfo
Nodes []v4PullRequestCommit
} `graphql:"commits(first: 100, after: $commitCursor)"`

Comments struct {
PageInfo v4PageInfo
Nodes []v4IssueComment
Expand All @@ -276,12 +279,10 @@ func (ghc *GitHubContext) loadPagedData() error {
"name": githubv4.String(ghc.repo),
"number": githubv4.Int(ghc.number),

"commitCursor": (*githubv4.String)(nil),
"commentCursor": (*githubv4.String)(nil),
"reviewCursor": (*githubv4.String)(nil),
}

commits := []*Commit{}
comments := []*Comment{}
reviews := []*Review{}
for {
Expand All @@ -290,13 +291,6 @@ func (ghc *GitHubContext) loadPagedData() error {
return errors.Wrap(err, "failed to load pull request data")
}

for _, c := range q.Repository.PullRequest.Commits.Nodes {
commits = append(commits, c.Commit.ToCommit())
}
if !q.Repository.PullRequest.Commits.PageInfo.UpdateCursor(qvars, "commitCursor") {
complete++
}

for _, c := range q.Repository.PullRequest.Comments.Nodes {
comments = append(comments, c.ToComment())
}
Expand All @@ -311,41 +305,100 @@ func (ghc *GitHubContext) loadPagedData() error {
complete++
}

if complete == 3 {
if complete == 2 {
break
}
}

if ghc.pr.IsCrossRepository {
// if the commits are from a fork, the pushed date is not set in the
// query result above; this is a GitHub limitation as of 2019-04-25,
// but their support team has suggested it will be fixed in the future
if err := ghc.loadPushedAt(commits); err != nil {
return err
ghc.comments = comments
ghc.reviews = reviews
return nil
}

func (ghc *GitHubContext) loadCommits() ([]*Commit, error) {
// github does not always return the latest commit information for a PR
// immediately after it was updated; if we're missing data, try again
attempts := 0
for {
rawCommits, err := ghc.loadRawCommits()
if err != nil {
return nil, err
}

var head *Commit
commits := make([]*Commit, 0, len(rawCommits))

for _, r := range rawCommits {
c := r.Commit.ToCommit()
if c.SHA == ghc.pr.HeadRefOID {
head = c
}
commits = append(commits, c)
}

if head != nil {
// as of 2019-05-01, the GitHub API does return pushed date for
// commits from forks, so we must load that separately
if ghc.pr.IsCrossRepository && head.PushedAt == nil {
if err := ghc.loadPushedAt(commits); err != nil {
return nil, err
}
}
if head.PushedAt != nil {
return commits, nil
}
}

attempts++
if attempts >= commitLoadMaxAttempts {
msg := "missing"
if head != nil {
msg += " pushed date"
}
return nil, errors.Errorf("head commit %.10s is %s; this is probably a bug", ghc.pr.HeadRefOID, msg)
}

time.Sleep(time.Duration(attempts) * commitLoadBaseDelay)
}
}

func (ghc *GitHubContext) loadRawCommits() ([]*v4PullRequestCommit, error) {
var q struct {
Repository struct {
PullRequest struct {
Commits struct {
PageInfo v4PageInfo
Nodes []*v4PullRequestCommit
} `graphql:"commits(first: 100, after: $cursor)"`
} `graphql:"pullRequest(number: $number)"`
} `graphql:"repository(owner: $owner, name: $name)"`
}
if err := backfillPushedAt(commits, ghc.pr.HeadRefOID); err != nil {
return err
qvars := map[string]interface{}{
"owner": githubv4.String(ghc.owner),
"name": githubv4.String(ghc.repo),
"number": githubv4.Int(ghc.number),
"cursor": (*githubv4.String)(nil),
}

ghc.commits = commits
ghc.comments = comments
ghc.reviews = reviews
return nil
commits := []*v4PullRequestCommit{}
for {
if err := ghc.v4client.Query(ghc.ctx, &q, qvars); err != nil {
return nil, errors.Wrap(err, "failed to load commits")
}
commits = append(commits, q.Repository.PullRequest.Commits.Nodes...)
if !q.Repository.PullRequest.Commits.PageInfo.UpdateCursor(qvars, "cursor") {
break
}
}
return commits, nil
}

func (ghc *GitHubContext) loadPushedAt(commits []*Commit) error {
commitsBySHA := make(map[string]*Commit)
commitsBySHA := make(map[string]*Commit, len(commits))
for _, c := range commits {
commitsBySHA[c.SHA] = c
}

if c, ok := commitsBySHA[ghc.pr.HeadRefOID]; ok && c.PushedAt != nil {
// head commit has a pushed date, so assume we don't need to load more
zerolog.Ctx(ghc.ctx).Debug().Msg("Skipping pushed date loading, a date was already present")
return nil
}

var q struct {
Repository struct {
Object struct {
Expand All @@ -372,14 +425,12 @@ func (ghc *GitHubContext) loadPushedAt(commits []*Commit) error {
if err := ghc.v4client.Query(ghc.ctx, &q, qvars); err != nil {
return errors.Wrap(err, "failed to load commit pushed dates")
}

for _, n := range q.Repository.Object.Commit.History.Nodes {
if c, ok := commitsBySHA[n.OID]; ok {
c.PushedAt = n.PushedDate
delete(commitsBySHA, n.OID)
}
}

if !q.Repository.Object.Commit.History.PageInfo.UpdateCursor(qvars, "cursor") {
break
}
Expand All @@ -391,24 +442,12 @@ func (ghc *GitHubContext) loadPushedAt(commits []*Commit) error {
return nil
}

func backfillPushedAt(commits []*Commit, headSHA string) error {
if len(commits) == 0 {
return nil
}

commitsBySHA := make(map[string]*Commit)
func backfillPushedAt(commits []*Commit, headSHA string) {
commitsBySHA := make(map[string]*Commit, len(commits))
for _, c := range commits {
commitsBySHA[c.SHA] = c
}

if head, ok := commitsBySHA[headSHA]; !ok || head.PushedAt == nil {
msg := "missing"
if ok {
msg += " a pushed date"
}
return errors.Errorf("head commit %s is %s; this is probably a bug", headSHA, msg)
}

root := headSHA
for {
c, ok := commitsBySHA[root]
Expand All @@ -424,9 +463,10 @@ func backfillPushedAt(commits []*Commit, headSHA string) error {
if firstParent.PushedAt == nil {
firstParent.PushedAt = c.PushedAt
}

delete(commitsBySHA, root)
root = firstParent.SHA
}
return nil
}

// if adding new fields to this struct, modify Locator#toV4() as well
Expand Down
47 changes: 42 additions & 5 deletions pull/github_test.go
Expand Up @@ -27,6 +27,12 @@ import (
"github.com/stretchr/testify/require"
)

func init() {
// adjust attempts/delays for faster tests
commitLoadMaxAttempts = 2
commitLoadBaseDelay = time.Millisecond
}

func TestChangedFiles(t *testing.T) {
rp := &ResponsePlayer{}
filesRule := rp.AddRule(
Expand Down Expand Up @@ -66,16 +72,13 @@ func TestCommits(t *testing.T) {
"testdata/responses/pull_commits.yml",
)

pr := defaultTestPR()
pr.Commits = github.Int(3)

ctx := makeContext(t, rp, pr)
ctx := makeContext(t, rp, nil)

commits, err := ctx.Commits()
require.NoError(t, err)

require.Len(t, commits, 3, "incorrect number of commits")
assert.Equal(t, 2, dataRule.Count, "no http request was made")
assert.Equal(t, 2, dataRule.Count, "incorrect number of http requests")

expectedTime, err := time.Parse(time.RFC3339, "2018-12-04T12:34:56Z")
assert.NoError(t, err)
Expand Down Expand Up @@ -103,6 +106,40 @@ func TestCommits(t *testing.T) {
assert.Equal(t, 2, dataRule.Count, "cached commits were not used")
}

func TestCommitsRetry(t *testing.T) {
rp := &ResponsePlayer{}
dataRule := rp.AddRule(
GraphQLNodePrefixMatcher("repository.pullRequest.commits"),
"testdata/responses/pull_commits_retry.yml",
)

ctx := makeContext(t, rp, nil)

commits, err := ctx.Commits()
require.NoError(t, err)

require.Len(t, commits, 3, "incorrect number of commits")
assert.Equal(t, 2, dataRule.Count, "incorrect number of http requests")

expectedTime, err := time.Parse(time.RFC3339, "2018-12-04T12:34:56Z")
assert.NoError(t, err)

assert.Equal(t, "a6f3f69b64eaafece5a0d854eb4af11c0d64394c", commits[0].SHA)
assert.Equal(t, "mhaypenny", commits[0].Author)
assert.Equal(t, "mhaypenny", commits[0].Committer)
assert.Equal(t, newTime(expectedTime), commits[0].PushedAt)

assert.Equal(t, "1fc89f1cedf8e3f3ce516ab75b5952295c8ea5e9", commits[1].SHA)
assert.Equal(t, "mhaypenny", commits[1].Author)
assert.Equal(t, "mhaypenny", commits[1].Committer)
assert.Equal(t, newTime(expectedTime), commits[1].PushedAt)

assert.Equal(t, "e05fcae367230ee709313dd2720da527d178ce43", commits[2].SHA)
assert.Equal(t, "ttest", commits[2].Author)
assert.Equal(t, "mhaypenny", commits[2].Committer)
assert.Equal(t, newTime(expectedTime.Add(48*time.Hour)), commits[2].PushedAt)
}

func TestReviews(t *testing.T) {
rp := &ResponsePlayer{}
dataRule := rp.AddRule(
Expand Down