Skip to content

Commit

Permalink
Generate .shard_metadata file in cron job shard (#814)
Browse files Browse the repository at this point in the history
Co-authored-by: Azeem Shaikh <azeems@google.com>
  • Loading branch information
azeemshaikh38 and azeemsgoogle authored Aug 6, 2021
1 parent d58fd2d commit 7f71928
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 5 deletions.
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,11 @@ build: ## Build all binaries and images in the reepo.
build: $(build-targets)

build-proto: ## Compiles and generates all required protobufs
build-proto: cron/data/request.pb.go
build-proto: cron/data/request.pb.go cron/data/metadata.pb.go
cron/data/request.pb.go: cron/data/request.proto | $(PROTOC)
protoc --go_out=../../../ cron/data/request.proto
cron/data/metadata.pb.go: cron/data/metadata.proto | $(PROTOC)
protoc --go_out=../../../ cron/data/metadata.proto

generate-docs: ## Generates docs
generate-docs: docs/checks.md
Expand Down Expand Up @@ -152,8 +154,9 @@ dockerbuild: ## Runs docker build
# Build all Docker images in the Repo
$(call ndef, GITHUB_AUTH_TOKEN)
DOCKER_BUILDKIT=1 docker build . --file Dockerfile --tag $(IMAGE_NAME)
DOCKER_BUILDKIT=1 docker build . --file cron/controller/Dockerfile --tag $(IMAGE_NAME)-batch-controller
DOCKER_BUILDKIT=1 docker build . --file cron/worker/Dockerfile --tag $(IMAGE_NAME)-batch-worker
DOCKER_BUILDKIT=1 docker build . --file cron/controller/Dockerfile \
--build-arg=COMMIT_SHA=$(GIT_HASH) --tag $(IMAGE_NAME)-batch-controller
DOCKER_BUILDKIT=1 docker build . --file cron/worker/Dockerfile --tag $(IMAGE_NAME)-batch-worker
DOCKER_BUILDKIT=1 docker build . --file cron/bq/Dockerfile --tag $(IMAGE_NAME)-bq-transfer
###############################################################################

Expand Down
3 changes: 3 additions & 0 deletions cron/bq/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ func getBucketSummary(ctx context.Context, bucketURL string) (*bucketSummary, er
summary.getOrCreate(creationTime).shardsCreated++
case filename == config.TransferStatusFilename:
summary.getOrCreate(creationTime).isTransferred = true
case filename == config.ShardMetadataFilename:
// TODO(azeems): Handle shard_metadata file.
continue
default:
// nolint: goerr113
return nil, fmt.Errorf("found unrecognized file: %s", key)
Expand Down
1 change: 1 addition & 0 deletions cron/cloudbuild/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
steps:
- name: 'gcr.io/cloud-builders/docker'
args: ['build', '.',
'--build-arg', 'COMMIT_SHA=$COMMIT_SHA',
'-t', 'gcr.io/openssf/scorecard-batch-controller:$COMMIT_SHA',
'-t', 'gcr.io/openssf/scorecard-batch-controller:latest',
'-f', 'cron/controller/Dockerfile']
Expand Down
2 changes: 2 additions & 0 deletions cron/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import (
)

const (
// ShardMetadataFilename file contains metadata for the created shard.
ShardMetadataFilename string = ".shard_metadata"
// ShardNumFilename is the name of the file that stores the number of shards.
ShardNumFilename string = ".shard_num"
// TransferStatusFilename file identifies if shard transfer to BigQuery is completed.
Expand Down
2 changes: 2 additions & 0 deletions cron/controller/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ ARG TARGETARCH
RUN CGO_ENABLED=0 make build-pubsub

FROM gcr.io/distroless/base:nonroot@sha256:bc84925113289d139a9ef2f309f0dd7ac46ea7b786f172ba9084ffdb4cbd9490
ARG COMMIT_SHA
ENV SCORECARD_COMMIT_SHA=${COMMIT_SHA}
COPY ./cron/data/projects*csv cron/data/
COPY --from=pubsub /src/cron/controller/controller cron/controller/controller
ENTRYPOINT ["cron/controller/controller"]
26 changes: 24 additions & 2 deletions cron/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,16 @@ import (
"strconv"
"time"

"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/types/known/timestamppb"

"github.com/ossf/scorecard/v2/cron/config"
"github.com/ossf/scorecard/v2/cron/data"
"github.com/ossf/scorecard/v2/cron/pubsub"
)

const commitSHA = "SCORECARD_COMMIT_SHA"

func publishToRepoRequestTopic(ctx context.Context, iter data.Iterator, datetime time.Time) (int32, error) {
var shardNum int32
request := data.ScorecardBatchRequest{
Expand Down Expand Up @@ -99,18 +102,37 @@ func main() {
panic(err)
}

shardNum, err := publishToRepoRequestTopic(ctx, reader, t)
bucket, err := config.GetResultDataBucketURL()
if err != nil {
panic(err)
}
bucket, err := config.GetResultDataBucketURL()

shardNum, err := publishToRepoRequestTopic(ctx, reader, t)
if err != nil {
panic(err)
}
// TODO(azeems): Stop populating `.shard_num` file.
err = data.WriteToBlobStore(ctx, bucket,
data.GetShardNumFilename(t),
[]byte(strconv.Itoa(int(shardNum+1))))
if err != nil {
panic(err)
}
// Populate `.shard_metadata` file.
metadata := data.ShardMetadata{
NumShard: new(int32),
ShardLoc: new(string),
CommitSha: new(string),
}
*metadata.NumShard = (shardNum + 1)
*metadata.ShardLoc = bucket + "/" + data.GetBlobFilename("", t)
*metadata.CommitSha = os.Getenv(commitSHA)
metadataJSON, err := protojson.Marshal(&metadata)
if err != nil {
panic(fmt.Errorf("error during protojson.Marshal: %w", err))
}
err = data.WriteToBlobStore(ctx, bucket, data.GetShardMetadataFilename(t), metadataJSON)
if err != nil {
panic(fmt.Errorf("error writing to BlobStore: %w", err))
}
}
5 changes: 5 additions & 0 deletions cron/data/blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ func GetTransferStatusFilename(datetime time.Time) string {
return GetBlobFilename(config.TransferStatusFilename, datetime)
}

// GetShardMetadataFilename returns shard_metadata filename for a shard.
func GetShardMetadataFilename(datetime time.Time) string {
return GetBlobFilename(config.ShardMetadataFilename, datetime)
}

// ParseBlobFilename parses a blob key into a Time object.
func ParseBlobFilename(key string) (time.Time, string, error) {
if len(key) < len(filePrefixFormat) {
Expand Down
184 changes: 184 additions & 0 deletions cron/data/metadata.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions cron/data/metadata.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright 2021 Security Scorecard Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package ossf.scorecard.cron.data;

option go_package = "github.com/ossf/scorecard/cron/data";

message ShardMetadata {
optional string shard_loc = 1;
optional int32 num_shard = 2;
optional string commit_sha = 3;
}

0 comments on commit 7f71928

Please sign in to comment.