Skip to content

Commit

Permalink
Retry commit loading if data is missing (#78)
Browse files Browse the repository at this point in the history
GitHub seems to be missing the head commit or the pushed date for the
head commit on when making API calls immediately after a PR is created
or updated. Later evaluations return the expected information, so if
we're missing the required data, try reloading after a few seconds.
  • Loading branch information
bluekeyes committed May 2, 2019
1 parent 0452a46 commit b4b6b6f
Show file tree
Hide file tree
Showing 3 changed files with 315 additions and 62 deletions.
154 changes: 97 additions & 57 deletions pull/github.go
Expand Up @@ -22,7 +22,6 @@ import (

"github.com/google/go-github/github"
"github.com/pkg/errors"
"github.com/rs/zerolog"
"github.com/shurcooL/githubv4"
)

Expand All @@ -36,6 +35,11 @@ const (
MaxPullRequestCommits = 250
)

var (
commitLoadMaxAttempts = 3
commitLoadBaseDelay = time.Second
)

// Locator identifies a pull request and optionally contains a full or partial
// pull request object.
type Locator struct {
Expand Down Expand Up @@ -221,12 +225,16 @@ func (ghc *GitHubContext) ChangedFiles() ([]*File, error) {

func (ghc *GitHubContext) Commits() ([]*Commit, error) {
if ghc.commits == nil {
if err := ghc.loadPagedData(); err != nil {
commits, err := ghc.loadCommits()
if err != nil {
return nil, err
}
}
if len(ghc.commits) >= MaxPullRequestCommits {
return nil, errors.Errorf("too many commits in pull request, maximum is %d", MaxPullRequestCommits)
if len(commits) >= MaxPullRequestCommits {
return nil, errors.Errorf("too many commits in pull request, maximum is %d", MaxPullRequestCommits)
}

backfillPushedAt(commits, ghc.pr.HeadRefOID)
ghc.commits = commits
}
return ghc.commits, nil
}
Expand All @@ -250,15 +258,10 @@ func (ghc *GitHubContext) Reviews() ([]*Review, error) {
}

func (ghc *GitHubContext) loadPagedData() error {
// this is a minor optimization: make max(c,m,r) requests instead of c+m+r
// this is a minor optimization: make max(c,r) requests instead of c+r
var q struct {
Repository struct {
PullRequest struct {
Commits struct {
PageInfo v4PageInfo
Nodes []v4PullRequestCommit
} `graphql:"commits(first: 100, after: $commitCursor)"`

Comments struct {
PageInfo v4PageInfo
Nodes []v4IssueComment
Expand All @@ -276,12 +279,10 @@ func (ghc *GitHubContext) loadPagedData() error {
"name": githubv4.String(ghc.repo),
"number": githubv4.Int(ghc.number),

"commitCursor": (*githubv4.String)(nil),
"commentCursor": (*githubv4.String)(nil),
"reviewCursor": (*githubv4.String)(nil),
}

commits := []*Commit{}
comments := []*Comment{}
reviews := []*Review{}
for {
Expand All @@ -290,13 +291,6 @@ func (ghc *GitHubContext) loadPagedData() error {
return errors.Wrap(err, "failed to load pull request data")
}

for _, c := range q.Repository.PullRequest.Commits.Nodes {
commits = append(commits, c.Commit.ToCommit())
}
if !q.Repository.PullRequest.Commits.PageInfo.UpdateCursor(qvars, "commitCursor") {
complete++
}

for _, c := range q.Repository.PullRequest.Comments.Nodes {
comments = append(comments, c.ToComment())
}
Expand All @@ -311,41 +305,100 @@ func (ghc *GitHubContext) loadPagedData() error {
complete++
}

if complete == 3 {
if complete == 2 {
break
}
}

if ghc.pr.IsCrossRepository {
// if the commits are from a fork, the pushed date is not set in the
// query result above; this is a GitHub limitation as of 2019-04-25,
// but their support team has suggested it will be fixed in the future
if err := ghc.loadPushedAt(commits); err != nil {
return err
ghc.comments = comments
ghc.reviews = reviews
return nil
}

func (ghc *GitHubContext) loadCommits() ([]*Commit, error) {
// github does not always return the latest commit information for a PR
// immediately after it was updated; if we're missing data, try again
attempts := 0
for {
rawCommits, err := ghc.loadRawCommits()
if err != nil {
return nil, err
}

var head *Commit
commits := make([]*Commit, 0, len(rawCommits))

for _, r := range rawCommits {
c := r.Commit.ToCommit()
if c.SHA == ghc.pr.HeadRefOID {
head = c
}
commits = append(commits, c)
}

if head != nil {
// as of 2019-05-01, the GitHub API does return pushed date for
// commits from forks, so we must load that separately
if ghc.pr.IsCrossRepository && head.PushedAt == nil {
if err := ghc.loadPushedAt(commits); err != nil {
return nil, err
}
}
if head.PushedAt != nil {
return commits, nil
}
}

attempts++
if attempts >= commitLoadMaxAttempts {
msg := "missing"
if head != nil {
msg += " pushed date"
}
return nil, errors.Errorf("head commit %.10s is %s; this is probably a bug", ghc.pr.HeadRefOID, msg)
}

time.Sleep(time.Duration(attempts) * commitLoadBaseDelay)
}
}

func (ghc *GitHubContext) loadRawCommits() ([]*v4PullRequestCommit, error) {
var q struct {
Repository struct {
PullRequest struct {
Commits struct {
PageInfo v4PageInfo
Nodes []*v4PullRequestCommit
} `graphql:"commits(first: 100, after: $cursor)"`
} `graphql:"pullRequest(number: $number)"`
} `graphql:"repository(owner: $owner, name: $name)"`
}
if err := backfillPushedAt(commits, ghc.pr.HeadRefOID); err != nil {
return err
qvars := map[string]interface{}{
"owner": githubv4.String(ghc.owner),
"name": githubv4.String(ghc.repo),
"number": githubv4.Int(ghc.number),
"cursor": (*githubv4.String)(nil),
}

ghc.commits = commits
ghc.comments = comments
ghc.reviews = reviews
return nil
commits := []*v4PullRequestCommit{}
for {
if err := ghc.v4client.Query(ghc.ctx, &q, qvars); err != nil {
return nil, errors.Wrap(err, "failed to load commits")
}
commits = append(commits, q.Repository.PullRequest.Commits.Nodes...)
if !q.Repository.PullRequest.Commits.PageInfo.UpdateCursor(qvars, "cursor") {
break
}
}
return commits, nil
}

func (ghc *GitHubContext) loadPushedAt(commits []*Commit) error {
commitsBySHA := make(map[string]*Commit)
commitsBySHA := make(map[string]*Commit, len(commits))
for _, c := range commits {
commitsBySHA[c.SHA] = c
}

if c, ok := commitsBySHA[ghc.pr.HeadRefOID]; ok && c.PushedAt != nil {
// head commit has a pushed date, so assume we don't need to load more
zerolog.Ctx(ghc.ctx).Debug().Msg("Skipping pushed date loading, a date was already present")
return nil
}

var q struct {
Repository struct {
Object struct {
Expand All @@ -372,14 +425,12 @@ func (ghc *GitHubContext) loadPushedAt(commits []*Commit) error {
if err := ghc.v4client.Query(ghc.ctx, &q, qvars); err != nil {
return errors.Wrap(err, "failed to load commit pushed dates")
}

for _, n := range q.Repository.Object.Commit.History.Nodes {
if c, ok := commitsBySHA[n.OID]; ok {
c.PushedAt = n.PushedDate
delete(commitsBySHA, n.OID)
}
}

if !q.Repository.Object.Commit.History.PageInfo.UpdateCursor(qvars, "cursor") {
break
}
Expand All @@ -391,24 +442,12 @@ func (ghc *GitHubContext) loadPushedAt(commits []*Commit) error {
return nil
}

func backfillPushedAt(commits []*Commit, headSHA string) error {
if len(commits) == 0 {
return nil
}

commitsBySHA := make(map[string]*Commit)
func backfillPushedAt(commits []*Commit, headSHA string) {
commitsBySHA := make(map[string]*Commit, len(commits))
for _, c := range commits {
commitsBySHA[c.SHA] = c
}

if head, ok := commitsBySHA[headSHA]; !ok || head.PushedAt == nil {
msg := "missing"
if ok {
msg += " a pushed date"
}
return errors.Errorf("head commit %s is %s; this is probably a bug", headSHA, msg)
}

root := headSHA
for {
c, ok := commitsBySHA[root]
Expand All @@ -424,9 +463,10 @@ func backfillPushedAt(commits []*Commit, headSHA string) error {
if firstParent.PushedAt == nil {
firstParent.PushedAt = c.PushedAt
}

delete(commitsBySHA, root)
root = firstParent.SHA
}
return nil
}

// if adding new fields to this struct, modify Locator#toV4() as well
Expand Down
47 changes: 42 additions & 5 deletions pull/github_test.go
Expand Up @@ -27,6 +27,12 @@ import (
"github.com/stretchr/testify/require"
)

func init() {
// adjust attempts/delays for faster tests
commitLoadMaxAttempts = 2
commitLoadBaseDelay = time.Millisecond
}

func TestChangedFiles(t *testing.T) {
rp := &ResponsePlayer{}
filesRule := rp.AddRule(
Expand Down Expand Up @@ -66,16 +72,13 @@ func TestCommits(t *testing.T) {
"testdata/responses/pull_commits.yml",
)

pr := defaultTestPR()
pr.Commits = github.Int(3)

ctx := makeContext(t, rp, pr)
ctx := makeContext(t, rp, nil)

commits, err := ctx.Commits()
require.NoError(t, err)

require.Len(t, commits, 3, "incorrect number of commits")
assert.Equal(t, 2, dataRule.Count, "no http request was made")
assert.Equal(t, 2, dataRule.Count, "incorrect number of http requests")

expectedTime, err := time.Parse(time.RFC3339, "2018-12-04T12:34:56Z")
assert.NoError(t, err)
Expand Down Expand Up @@ -103,6 +106,40 @@ func TestCommits(t *testing.T) {
assert.Equal(t, 2, dataRule.Count, "cached commits were not used")
}

func TestCommitsRetry(t *testing.T) {
rp := &ResponsePlayer{}
dataRule := rp.AddRule(
GraphQLNodePrefixMatcher("repository.pullRequest.commits"),
"testdata/responses/pull_commits_retry.yml",
)

ctx := makeContext(t, rp, nil)

commits, err := ctx.Commits()
require.NoError(t, err)

require.Len(t, commits, 3, "incorrect number of commits")
assert.Equal(t, 2, dataRule.Count, "incorrect number of http requests")

expectedTime, err := time.Parse(time.RFC3339, "2018-12-04T12:34:56Z")
assert.NoError(t, err)

assert.Equal(t, "a6f3f69b64eaafece5a0d854eb4af11c0d64394c", commits[0].SHA)
assert.Equal(t, "mhaypenny", commits[0].Author)
assert.Equal(t, "mhaypenny", commits[0].Committer)
assert.Equal(t, newTime(expectedTime), commits[0].PushedAt)

assert.Equal(t, "1fc89f1cedf8e3f3ce516ab75b5952295c8ea5e9", commits[1].SHA)
assert.Equal(t, "mhaypenny", commits[1].Author)
assert.Equal(t, "mhaypenny", commits[1].Committer)
assert.Equal(t, newTime(expectedTime), commits[1].PushedAt)

assert.Equal(t, "e05fcae367230ee709313dd2720da527d178ce43", commits[2].SHA)
assert.Equal(t, "ttest", commits[2].Author)
assert.Equal(t, "mhaypenny", commits[2].Committer)
assert.Equal(t, newTime(expectedTime.Add(48*time.Hour)), commits[2].PushedAt)
}

func TestReviews(t *testing.T) {
rp := &ResponsePlayer{}
dataRule := rp.AddRule(
Expand Down

0 comments on commit b4b6b6f

Please sign in to comment.