🐛 Use leveled scoring for Code Review check (#2542)

* Ignore bot commits when calculating Code Review score * Update clients * Update scoring Signed-off-by: Raghav Kaul <raghavkaul@google.com> * Address PR comments * Test coverage * Docs * Raw results Signed-off-by: Raghav Kaul <raghavkaul@google.com> Signed-off-by: Raghav Kaul <raghavkaul@google.com>
ossf · Jan 11, 2023 · bf516e1 · bf516e1
1 parent ed9576c
commit bf516e1
Show file tree

Hide file tree

Showing 11 changed files with 232 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -439,7 +439,7 @@ Name        | Description                               | Risk Level | Token Req
 [Branch-Protection](docs/checks.md#branch-protection)           | Does the project use [Branch Protection](https://docs.github.com/en/free-pro-team@latest/github/administering-a-repository/about-protected-branches) ?                                                                                                                                                                       | High | PAT (`repo` or `repo> public_repo`), GITHUB_TOKEN    | certain settings are only supported with a maintainer PAT
 [CI-Tests](docs/checks.md#ci-tests)                             | Does the project run tests in CI, e.g. [GitHub Actions](https://docs.github.com/en/free-pro-team@latest/actions), [Prow](https://github.com/kubernetes/test-infra/tree/master/prow)?                                                                                                                                         | Low | PAT, GITHUB_TOKEN   |
 [CII-Best-Practices](docs/checks.md#cii-best-practices)         | Does the project have an [OpenSSF (formerly CII) Best Practices Badge](https://bestpractices.coreinfrastructure.org/en)?                                                                                                                                                                                                                         | Low  | PAT, GITHUB_TOKEN   |
-[Code-Review](docs/checks.md#code-review)                       | Does the project require code review before code is merged?                                                                                                                                                                                                                                                                  | High | PAT, GITHUB_TOKEN   |
+[Code-Review](docs/checks.md#code-review)                       | Does the project practice code review before code is merged?                                                                                                                                                                                                                                                                 | High | PAT, GITHUB_TOKEN   |
 [Contributors](docs/checks.md#contributors)                     | Does the project have contributors from at least two different organizations?                                                                                                                                                                                                                                                | Low | PAT, GITHUB_TOKEN   |
 [Dangerous-Workflow](docs/checks.md#dangerous-workflow)         | Does the project avoid dangerous coding patterns in GitHub Action workflows?                                                                                                                                                                                                                                                 | Critical | PAT, GITHUB_TOKEN   |
 [Dependency-Update-Tool](docs/checks.md#dependency-update-tool) | Does the project use tools to help update its dependencies?                                                                                                                                                                                                                                                                  | High | PAT, GITHUB_TOKEN   |

diff --git a/checks/code_review_test.go b/checks/code_review_test.go
@@ -186,7 +186,7 @@ func TestCodereview(t *testing.T) {
 				},
 			},
 			expected: checker.CheckResult{
-				Score: 5,
+				Score: 3,
 			},
 		},
 		{
@@ -211,7 +211,7 @@ func TestCodereview(t *testing.T) {
 				},
 			},
 			expected: checker.CheckResult{
-				Score: 5,
+				Score: 3,
 			},
 		},
 		{

diff --git a/checks/evaluation/code_review.go b/checks/evaluation/code_review.go
@@ -21,7 +21,7 @@ import (
 	sce "github.com/ossf/scorecard/v4/errors"
 )
 
-type reviewScore = int
+type reviewScore int
 
 // TODO(raghavkaul) More partial credit? E.g. approval from non-contributor, discussion liveness,
 // number of resolved comments, number of approvers (more eyes on a project).
@@ -42,18 +42,57 @@ func CodeReview(name string, dl checker.DetailLogger, r *checker.CodeReviewData)
 		return checker.CreateInconclusiveResult(name, "no commits found")
 	}
 
-	numReviewed := 0
+	foundHumanReviewActivity := false
+	foundBotReviewActivity := false
+	nUnreviewedHumanChanges := 0
+	nUnreviewedBotChanges := 0
 	for i := range r.DefaultBranchChangesets {
-		score := reviewScoreForChangeset(&r.DefaultBranchChangesets[i])
-		if score >= changesReviewed {
-			numReviewed += 1
+		cs := &r.DefaultBranchChangesets[i]
+		isReviewed := reviewScoreForChangeset(cs) >= changesReviewed
+		isBotCommit := cs.Author.IsBot
+
+		switch {
+		case isReviewed && isBotCommit:
+			foundBotReviewActivity = true
+		case isReviewed && !isBotCommit:
+			foundHumanReviewActivity = true
+		case !isReviewed && isBotCommit:
+			nUnreviewedBotChanges += 1
+		case !isReviewed && !isBotCommit:
+			nUnreviewedHumanChanges += 1
+		}
+	}
+
+	if foundBotReviewActivity && !foundHumanReviewActivity {
+		reason := fmt.Sprintf("found no human review activity in the last %v changesets", len(r.DefaultBranchChangesets))
+		return checker.CreateInconclusiveResult(name, reason)
+	}
+
+	switch {
+	case nUnreviewedBotChanges > 0 && nUnreviewedHumanChanges > 0:
+		return checker.CreateMinScoreResult(
+			name,
+			fmt.Sprintf("found unreviewed changesets (%v human %v bot)", nUnreviewedHumanChanges, nUnreviewedBotChanges),
+		)
+	case nUnreviewedBotChanges > 0:
+		return checker.CreateResultWithScore(
+			name,
+			fmt.Sprintf("all human changesets reviewed, found %v unreviewed bot changesets", nUnreviewedBotChanges),
+			7,
+		)
+	case nUnreviewedHumanChanges > 0:
+		score := 3
+		if len(r.DefaultBranchChangesets) == 1 || nUnreviewedHumanChanges > 1 {
+			score = 0
 		}
+		return checker.CreateResultWithScore(
+			name,
+			fmt.Sprintf("found %v unreviewed human changesets", nUnreviewedHumanChanges),
+			score,
+		)
 	}
-	reason := fmt.Sprintf(
-		"%v out of last %v changesets reviewed before merge", numReviewed, len(r.DefaultBranchChangesets),
-	)
 
-	return checker.CreateProportionalScoreResult(name, reason, numReviewed, len(r.DefaultBranchChangesets))
+	return checker.CreateMaxScoreResult(name, "all changesets reviewed")
 }
 
 func reviewScoreForChangeset(changeset *checker.Changeset) (score reviewScore) {

diff --git a/checks/evaluation/code_review_test.go b/checks/evaluation/code_review_test.go
@@ -55,10 +55,63 @@ func TestCodeReview(t *testing.T) {
 			rawData: &checker.CodeReviewData{
 				DefaultBranchChangesets: []checker.Changeset{
 					{
-						Commits: []clients.Commit{{SHA: "1"}},
+						Commits: []clients.Commit{{SHA: "a"}},
 					},
 					{
-						Commits: []clients.Commit{{SHA: "1"}},
+						Commits: []clients.Commit{{SHA: "a"}},
+					},
+				},
+			},
+		},
+		{
+			name: "Unreviewed human and bot changes",
+			expected: scut.TestReturn{
+				Score: checker.MinResultScore,
+			},
+			rawData: &checker.CodeReviewData{
+				DefaultBranchChangesets: []checker.Changeset{
+					{
+						Commits: []clients.Commit{{SHA: "a", Committer: clients.User{IsBot: true}}},
+					},
+					{
+						Commits: []clients.Commit{{SHA: "b"}},
+					},
+				},
+			},
+		},
+		{
+			name: "all human changesets reviewed, missing review on bot changeset",
+			expected: scut.TestReturn{
+				Score: 7,
+			},
+			rawData: &checker.CodeReviewData{
+				DefaultBranchChangesets: []checker.Changeset{
+					{
+						Author:         clients.User{Login: "alice"},
+						ReviewPlatform: checker.ReviewPlatformGitHub,
+						RevisionID:     "1",
+						Reviews: []clients.Review{
+							{
+								Author: &clients.User{},
+								State:  "APPROVED",
+							},
+						},
+						Commits: []clients.Commit{
+							{
+								Committer: clients.User{Login: "bob"},
+								SHA:       "b",
+							},
+						},
+					},
+					{
+						Author:     clients.User{Login: "alice-the-bot[bot]", IsBot: true},
+						RevisionID: "b",
+						Commits: []clients.Commit{
+							{
+								Committer: clients.User{Login: "alice-the-bot[bot]", IsBot: true},
+								SHA:       "b",
+							},
+						},
 					},
 				},
 			},
@@ -80,10 +133,54 @@ func TestCodeReview(t *testing.T) {
 								State:  "APPROVED",
 							},
 						},
+						Commits: []clients.Commit{
+							{
+								Committer: clients.User{Login: "bob"},
+								SHA:       "b",
+							},
+						},
+					},
+				},
+			},
+		},
+
+		{
+			name: "bot commits only",
+			expected: scut.TestReturn{
+				Score: checker.InconclusiveResultScore,
+			},
+			rawData: &checker.CodeReviewData{
+				DefaultBranchChangesets: []checker.Changeset{
+					{
+						Author:         clients.User{Login: "alice-the-bot[bot]", IsBot: true},
+						ReviewPlatform: checker.ReviewPlatformGitHub,
+						RevisionID:     "1",
+						Reviews: []clients.Review{
+							{
+								Author: &clients.User{},
+								State:  "APPROVED",
+							},
+						},
+						Commits: []clients.Commit{
+							{
+								Committer: clients.User{Login: "alice-the-bot[bot]", IsBot: true},
+								SHA:       "b",
+							},
+						},
+					},
+					{
+						RevisionID: "b",
+						Commits: []clients.Commit{
+							{
+								Committer: clients.User{Login: "alice-the-bot[bot]", IsBot: true},
+								SHA:       "b",
+							},
+						},
 					},
 				},
 			},
 		},
+
 		{
 			name: "all changesets reviewed outside github",
 			expected: scut.TestReturn{
@@ -94,7 +191,7 @@ func TestCodeReview(t *testing.T) {
 					{
 						ReviewPlatform: checker.ReviewPlatformGerrit,
 						RevisionID:     "1",
-						Commits:        []clients.Commit{{SHA: "1"}},
+						Commits:        []clients.Commit{{SHA: "a"}},
 					},
 				},
 			},

diff --git a/clients/githubrepo/graphql.go b/clients/githubrepo/graphql.go
@@ -75,7 +75,8 @@ type graphqlData struct {
 									}
 								}
 								Author struct {
-									Login githubv4.String
+									Login        githubv4.String
+									ResourcePath githubv4.String
 								}
 								Number     githubv4.Int
 								HeadRefOid githubv4.String
@@ -396,12 +397,16 @@ func commitsFrom(data *graphqlData, repoOwner, repoName string) ([]clients.Commi
 				string(pr.Repository.Name) != repoName {
 				continue
 			}
+			// ResourcePath: e.g., for dependabot, "/apps/dependabot", or "/apps/renovate"
+			// Path that can be appended to "https://github.com" for a Github resource
+			openedByBot := strings.HasPrefix(string(pr.Author.ResourcePath), "/apps/")
 			associatedPR = clients.PullRequest{
 				Number:   int(pr.Number),
 				HeadSHA:  string(pr.HeadRefOid),
 				MergedAt: pr.MergedAt.Time,
 				Author: clients.User{
 					Login: string(pr.Author.Login),
+					IsBot: openedByBot,
 				},
 				MergedBy: clients.User{
 					Login: string(pr.MergedBy.Login),

diff --git a/clients/gitlabrepo/contributors.go b/clients/gitlabrepo/contributors.go
@@ -77,6 +77,7 @@ func (handler *contributorsHandler) setup() error {
 				Companies:        []string{users[0].Organization},
 				NumContributions: contrib.Commits,
 				ID:               int64(users[0].ID),
+				IsBot:            users[0].Bot,
 			}
 			handler.contributors = append(handler.contributors, contributor)
 		}

diff --git a/clients/user.go b/clients/user.go
@@ -21,6 +21,7 @@ type User struct {
 	Organizations    []User
 	NumContributions int
 	ID               int64
+	IsBot            bool
 }
 
 // RepoAssociation is how a user is associated with a repository.

diff --git a/docs/checks.md b/docs/checks.md
@@ -208,13 +208,19 @@ malicious code (either as a malicious contributor or as an attacker who has
 subverted a contributor's account), because a reviewer might detect the
 subversion.
 
-The check first tries to detect whether [Branch-Protection](checks.md#branch-protection) is enabled on the
-default branch with at least one required reviewer. If this fails, the check
-determines whether the most recent (~30) commits have a Github-approved review
+The check determines whether the most recent changes (over the last ~30 commits) have 
+an approval on GitHub
 or if the merger is different from the committer (implicit review). It also
 performs a similar check for reviews using
 [Prow](https://github.com/kubernetes/test-infra/tree/master/prow#readme) (labels
 "lgtm" or "approved") and [Gerrit](https://www.gerritcodereview.com/) ("Reviewed-on" and "Reviewed-by").
+If recent changes are solely bot activity (e.g. dependabot, renovatebot, or custom bots),
+the check returns inconclusively.
+
+Scoring is leveled instead of proportional to make the check more predictable.
+If any bot-originated changes are unreviewed, 3 points are deducted. If any human
+changes are unreviewed, 7 points are deducted if a single change is unreviewed, and
+another 3 are deducted if multiple changes are unreviewed.
 
 Note: Requiring reviews for all changes is infeasible for some projects, such as
 those with only one active participant. Even a project with multiple active

diff --git a/docs/checks/internal/checks.yaml b/docs/checks/internal/checks.yaml
@@ -308,13 +308,19 @@ checks:
       subverted a contributor's account), because a reviewer might detect the
       subversion.
 
-      The check first tries to detect whether [Branch-Protection](checks.md#branch-protection) is enabled on the
-      default branch with at least one required reviewer. If this fails, the check
-      determines whether the most recent (~30) commits have a Github-approved review
+      The check determines whether the most recent changes (over the last ~30 commits) have 
+      an approval on GitHub
       or if the merger is different from the committer (implicit review). It also
       performs a similar check for reviews using
       [Prow](https://github.com/kubernetes/test-infra/tree/master/prow#readme) (labels
       "lgtm" or "approved") and [Gerrit](https://www.gerritcodereview.com/) ("Reviewed-on" and "Reviewed-by").
+      If recent changes are solely bot activity (e.g. dependabot, renovatebot, or custom bots),
+      the check returns inconclusively.
+
+      Scoring is leveled instead of proportional to make the check more predictable.
+      If any bot-originated changes are unreviewed, 3 points are deducted. If any human
+      changes are unreviewed, 7 points are deducted if a single change is unreviewed, and
+      another 3 are deducted if multiple changes are unreviewed.
 
       Note: Requiring reviews for all changes is infeasible for some projects, such as
       those with only one active participant. Even a project with multiple active

diff --git a/e2e/code_review_test.go b/e2e/code_review_test.go
@@ -16,7 +16,6 @@ package e2e
 
 import (
 	"context"
-	"fmt"
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -96,13 +95,54 @@ var _ = Describe("E2E TEST:"+checks.CheckCodeReview, func() {
 			gh := 0
 			for _, cs := range reviewData.DefaultBranchChangesets {
 				if cs.ReviewPlatform == checker.ReviewPlatformGitHub {
-					fmt.Printf("found github revision %s in spring-framework", cs.RevisionID)
 					gh += 1
 				}
 			}
 			Expect(gh).Should(BeNumerically("==", 2))
 
 			Expect(repoClient.Close()).Should(BeNil())
 		})
+		It("Should return inconclusive results for a single-maintainer project with only self- or bot changesets", func() {
+			dl := scut.TestDetailLogger{}
+			repo, err := githubrepo.MakeGithubRepo("Kromey/fast_poisson")
+			Expect(err).Should(BeNil())
+			repoClient := githubrepo.CreateGithubRepoClient(context.Background(), logger)
+			err = repoClient.InitRepo(repo, "bb7b9606690c2b386dc9e2cbe0216d389ed1f078", 0)
+			Expect(err).Should(BeNil())
+
+			req := checker.CheckRequest{
+				Ctx:        context.Background(),
+				RepoClient: repoClient,
+				Repo:       repo,
+				Dlogger:    &dl,
+			}
+			expected := scut.TestReturn{
+				Score: checker.InconclusiveResultScore,
+			}
+			result := checks.CodeReview(&req)
+			Expect(scut.ValidateTestReturn(nil, "use code reviews", &expected, &result, &dl)).Should(BeTrue())
+			Expect(repoClient.Close()).Should(BeNil())
+		})
+		It("Should return minimum score for a single-maintainer project with some unreviewed human changesets", func() {
+			dl := scut.TestDetailLogger{}
+			repo, err := githubrepo.MakeGithubRepo("Kromey/fast_poisson")
+			Expect(err).Should(BeNil())
+			repoClient := githubrepo.CreateGithubRepoClient(context.Background(), logger)
+			err = repoClient.InitRepo(repo, "10aefa7c9a6669ef34e209c3c4b6ad48dd9844e3", 0)
+			Expect(err).Should(BeNil())
+
+			req := checker.CheckRequest{
+				Ctx:        context.Background(),
+				RepoClient: repoClient,
+				Repo:       repo,
+				Dlogger:    &dl,
+			}
+			expected := scut.TestReturn{
+				Score: checker.MinResultScore,
+			}
+			result := checks.CodeReview(&req)
+			Expect(scut.ValidateTestReturn(nil, "use code reviews", &expected, &result, &dl)).Should(BeTrue())
+			Expect(repoClient.Close()).Should(BeNil())
+		})
 	})
 })