Skip to content

Commit

Permalink
feat(extraction): support tree artifacts in bazel extractor (#4932)
Browse files Browse the repository at this point in the history
* feat(extraction): support tree artifacts in bazel extractor

Usually artifacts in bazel are regular files. But sometimes it can be directories
(tree artifacts): https://docs.bazel.build/versions/master/glossary.html#artifact
In this case extractor has to recursively include all files in such directories.

Had to refactor unit test as now extractor performs file IO so need to initialize
all input files by creating empty files.
  • Loading branch information
nbeloglazov committed May 21, 2021
1 parent b235674 commit 147034d
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 25 deletions.
1 change: 1 addition & 0 deletions kythe/go/extractors/bazel/BUILD
Expand Up @@ -13,6 +13,7 @@ go_library(
],
visibility = [PUBLIC_VISIBILITY],
deps = [
"//kythe/go/extractors/bazel/treeset",
"//kythe/go/platform/kindex",
"//kythe/go/platform/kzip",
"//kythe/go/util/ptypes",
Expand Down
17 changes: 10 additions & 7 deletions kythe/go/extractors/bazel/extractor.go
Expand Up @@ -28,6 +28,7 @@ import (
"sort"
"time"

"kythe.io/kythe/go/extractors/bazel/treeset"
"kythe.io/kythe/go/platform/kzip"
"kythe.io/kythe/go/util/vnameutil"

Expand Down Expand Up @@ -213,8 +214,8 @@ func (c *Config) extract(ctx context.Context, info *ActionInfo, file fileReader)
Argument: info.Arguments,
}

// Capture the primary output path. Although the action has room for
// multiple outputs, we expect only one to be set in practice. It's
// Capture the primary output path. Although the action has room for
// multiple outputs, we expect only one to be set in practice. It's
// harmless if there are more, though, so don't fail for that.
if len(info.Outputs) > 0 {
cu.OutputKey = info.Outputs[0]
Expand All @@ -235,11 +236,11 @@ func (c *Config) extract(ctx context.Context, info *ActionInfo, file fileReader)
log.Printf("ERROR: Adding build details: %v", err)
}

// Load and populate file contents and required inputs. First scan the
// Load and populate file contents and required inputs. First scan the
// inputs and filter out which ones we actually want to keep by path
// inspection; then load the contents concurrently.
sort.Strings(info.Inputs) // ensure a consistent order
inputs := c.classifyInputs(info, cu)
inputs := c.classifyInputs(ctx, info, cu)

start := time.Now()
if err := c.fetchInputs(ctx, inputs, func(i int, r io.Reader) error {
Expand Down Expand Up @@ -337,9 +338,11 @@ func (c *Config) inferCorpus(info *ActionInfo) string {
// classifyInputs updates unit to add required inputs for each matching path
// and to identify source inputs according to the rules of c. The filtered
// complete list of inputs paths is returned.
func (c *Config) classifyInputs(info *ActionInfo, unit *apb.CompilationUnit) []string {
func (c *Config) classifyInputs(ctx context.Context, info *ActionInfo, unit *apb.CompilationUnit) []string {
var inputs, sourceFiles stringset.Set
for _, in := range info.Inputs {
// Inputs might be file or directories https://docs.bazel.build/versions/master/glossary.html#artifact
// So we need to expand directories and process only files.
for _, in := range treeset.ExpandDirectories(ctx, info.Inputs) {
path, ok := c.checkInput(in)
if ok {
if !inputs.Add(path) {
Expand All @@ -366,7 +369,7 @@ func (c *Config) classifyInputs(info *ActionInfo, unit *apb.CompilationUnit) []s
c.logPrintf("Excluding input file: %q", in)
}
}
for _, src := range info.Sources {
for _, src := range treeset.ExpandDirectories(ctx, info.Sources) {
if inputs.Contains(src) {
c.logPrintf("Matched source file from action: %q", src)
sourceFiles.Add(src)
Expand Down
63 changes: 45 additions & 18 deletions kythe/go/extractors/bazel/extractor_test.go
Expand Up @@ -53,16 +53,32 @@ const (
wantDigest = emptyDigest
)

var (
func prependFolder(folder string, paths ...string) []string {
var result []string
for _, path := range paths {
result = append(result, folder+"/"+path)
}
return result
}

func createEmptyFiles(t *testing.T, paths []string) {
for _, path := range paths {
if err := ioutil.WriteFile(path, nil, 0755); err != nil {
t.Fatalf("Creating test file: %v", err)
}
}
}

func createActionInfo(folder string) (*ActionInfo, *xapb.SpawnInfo) {
// Gin up an exra action record with some known fields, and make sure the
// extractor handles them correctly.
xa = &xapb.ExtraActionInfo{
xa := &xapb.ExtraActionInfo{
Owner: proto.String(testTarget),
Mnemonic: proto.String("SomeAction"),
}
si = &xapb.SpawnInfo{
si := &xapb.SpawnInfo{
Argument: []string{"cc", "-o", testOutput, "-c", "2.src", "4.src"},
InputFile: []string{"1.dep", "2.src", "3.dep", "1.dep", "4.src"},
InputFile: prependFolder(folder, "1.dep", "2.src", "3.dep", "1.dep", "4.src", "treeArtifact"),
OutputFile: []string{testOutput, "garbage"},
Variable: []*xapb.EnvironmentVariable{{
Name: proto.String("PATH"),
Expand All @@ -72,19 +88,15 @@ var (
Value: proto.String("should not be seen"),
}},
}

ai *ActionInfo
)

func init() {
if err := proto.SetExtension(xa, xapb.E_SpawnInfo_SpawnInfo, si); err != nil {
log.Fatalf("Error setting extension on XA: %v", err)
}
if act, err := SpawnAction(xa); err != nil {
act, err := SpawnAction(xa)
if err != nil {
log.Fatalf("Error generating Spawn action: %v", err)
} else {
ai = act
return nil, si
}
return act, si
}

type results struct {
Expand Down Expand Up @@ -134,13 +146,13 @@ func (r *results) newConfig() *Config {
}
}

func (r *results) checkValues(t *testing.T, cu *apb.CompilationUnit) {
func (r *results) checkValues(t *testing.T, cu *apb.CompilationUnit, si *xapb.SpawnInfo, folder string) {
t.Helper()
// Verify that the info check callback was invoked.
wantInfo := &ActionInfo{ // N.B.: Values prior to filtering!
Target: testTarget,
Arguments: []string{"cc", "-o", testOutput, "-c", "2.src", "4.src"},
Inputs: []string{"1.dep", "1.dep", "2.src", "3.dep", "4.src"},
Inputs: prependFolder(folder, "1.dep", "1.dep", "2.src", "3.dep", "4.src", "treeArtifact"),
Outputs: []string{testOutput, "garbage"},
Environment: map[string]string{
"PATH": "p1:p2",
Expand All @@ -154,8 +166,8 @@ func (r *results) checkValues(t *testing.T, cu *apb.CompilationUnit) {
}

// Verify that the inputs were all passed to the callback.
if !reflect.DeepEqual(r.checkedInputs, si.InputFile) {
t.Errorf("Wrong input files checked:\n got %+q\nwant %+q", r.checkedInputs, si.InputFile)
if want := prependFolder(folder, "1.dep", "1.dep", "2.src", "3.dep", "4.src", "treeArtifact/5.src", "treeArtifact/6.dep"); !reflect.DeepEqual(r.checkedInputs, want) {
t.Errorf("Wrong input files checked:\n got %+q\nwant %+q", r.checkedInputs, want)
}

// Verify that the environment got filtered correctly.
Expand All @@ -174,7 +186,7 @@ func (r *results) checkValues(t *testing.T, cu *apb.CompilationUnit) {
}

// Verify that the identified sources were correctly propagated.
if want := []string{"2.src", "4.src"}; !reflect.DeepEqual(cu.SourceFile, want) {
if want := prependFolder(folder, "2.src", "4.src", "treeArtifact/5.src"); !reflect.DeepEqual(cu.SourceFile, want) {
t.Errorf("Wrong source files:\n got %+q\nwant %+q", cu.SourceFile, want)
}

Expand Down Expand Up @@ -208,8 +220,23 @@ func (r *results) checkValues(t *testing.T, cu *apb.CompilationUnit) {
}

func TestExtractToFile(t *testing.T) {
// Prepare test directory and create src/dep files there.
// We want to test subdirectories as well so creating treeArtifact subdir.
tmp, err := ioutil.TempDir("", "TestExtractToFile")
if err != nil {
t.Fatalf("Error creating temp directory: %v", err)
}
err = os.Mkdir(tmp+"/treeArtifact", 0755)
if err != nil {
t.Fatalf("Error creating treeArfifact directory: %v", err)
}
createEmptyFiles(t,
prependFolder(tmp, "1.dep", "2.src", "3.dep", "4.src", "treeArtifact/5.src", "treeArtifact/6.dep"))
defer os.RemoveAll(tmp) // best effort

res := new(results)
config := res.newConfig()
ai, si := createActionInfo(tmp)

buf := bytes.NewBuffer(nil)
w, err := kzip.NewWriter(buf)
Expand All @@ -227,7 +254,7 @@ func TestExtractToFile(t *testing.T) {

var numUnits int
if err := kzip.Scan(bytes.NewReader(buf.Bytes()), func(_ *kzip.Reader, unit *kzip.Unit) error {
res.checkValues(t, unit.Proto)
res.checkValues(t, unit.Proto, si, tmp)
numUnits++
return nil
}); err != nil {
Expand Down
12 changes: 12 additions & 0 deletions kythe/go/extractors/bazel/treeset/BUILD
@@ -0,0 +1,12 @@
load("//:visibility.bzl", "PUBLIC_VISIBILITY")
load("//tools:build_rules/shims.bzl", "go_library")

go_library(
name = "treeset",
srcs = ["treeset.go"],
visibility = [PUBLIC_VISIBILITY],
deps = [
"//kythe/go/platform/vfs",
"@org_bitbucket_creachadair_stringset//:go_default_library",
],
)
105 changes: 105 additions & 0 deletions kythe/go/extractors/bazel/treeset/treeset.go
@@ -0,0 +1,105 @@
/*
* Copyright 2021 The Kythe Authors. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Package treeset provides functions for extracting targets that use bazel treesets.
package treeset

import (
"context"
"log"
"os"
"path/filepath"

"kythe.io/kythe/go/platform/vfs"

"bitbucket.org/creachadair/stringset"
)

// ListSources returns the source files underneath path. If path is a file, it returns a set with
// a single element. If path is a directory, it returns a set containing all the files
// (recursively) under path.
func ListSources(ctx context.Context, arg string) (stringset.Set, error) {
fi, err := vfs.Stat(ctx, arg)
if err != nil {
return nil, err
}
if fi.IsDir() {
s := stringset.New()
if err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
s.Add(path)
}
return nil
}); err != nil {
return nil, err
}
return s, nil
}
return stringset.New(arg), nil
}

// FindMissingTreeInputs returns the required files that are not explicitly listed in bazel's
// inputs because they were tree artifacts.
//
// Some bazel rules use tree artifacts for the inputs to the compiler. These are directories that
// Bazel expands to files when the action is run. Consequently, the list of inputs that Bazel has
// only contains the tree artifact directory. This function reports the files that are (1) required,
// (2) not included in bazel's inputs, and (3) have their parent directory included in Bazel's
// inputs.
func FindMissingTreeInputs(inputs []string, requiredFiles stringset.Set) []string {
missingInputs := stringset.New()
inputsSet := stringset.New(inputs...)
for file := range requiredFiles {
if inputsSet.Contains(file) {
continue
}
dir := filepath.Dir(file)
for dir != "" {
if inputsSet.Contains(dir) {
missingInputs.Add(file)
break
}
dir = filepath.Dir(dir)
}
if dir == "" {
log.Printf("WARNING: couldn't find an input for %s\n", file)
}

}
return missingInputs.Elements()
}

// ExpandDirectories returns the list of files contained in the provided paths.
//
// Paths can be individual files or directories. If it's a directory - all files
// within that directory added to the result.
func ExpandDirectories(ctx context.Context, paths []string) []string {
var nps []string
for _, root := range paths {
files, err := ListSources(ctx, root)
if err != nil {
log.Printf("WARNING: couldn't list files for %s: %s\n", root, err)
nps = append(nps, root)
} else {
nps = append(nps, files.Elements()...)
}
}

return nps
}

0 comments on commit 147034d

Please sign in to comment.