Skip to content

Commit

Permalink
Add user-agents to http requests sent by Package Analysis (#1018)
Browse files Browse the repository at this point in the history
* Add a simple package for setting the user-agent in request headers.

Signed-off-by: Caleb Brown <calebbrown@google.com>

* Ensure user-agents are set when http requests are made.

Signed-off-by: Caleb Brown <calebbrown@google.com>

* Add "production" to the user agent for the production env.

Signed-off-by: Caleb Brown <calebbrown@google.com>

* Tweak the build command to better handle multi file main packages.

Signed-off-by: Caleb Brown <calebbrown@google.com>

---------

Signed-off-by: Caleb Brown <calebbrown@google.com>
  • Loading branch information
calebbrown committed Feb 21, 2024
1 parent b443fd4 commit f39055d
Show file tree
Hide file tree
Showing 10 changed files with 246 additions and 86 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ jobs:
go-version: '1.21.0'
- name: Install libpcap-dev
run: sudo apt-get install -y libpcap-dev
- run: go build -o scheduler cmd/scheduler/main.go
- run: go build -o worker cmd/worker/main.go
- run: go build -o analyze cmd/analyze/main.go
- run: go build -o scheduler ./cmd/scheduler
- run: go build -o worker ./cmd/worker
- run: go build -o analyze ./cmd/analyze
- run: go build -o loader load.go
working-directory: function/loader
- run: go build -o staticanalyze staticanalyze.go
Expand Down
2 changes: 1 addition & 1 deletion cmd/analyze/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ COPY ./go.sum ./
RUN go mod download

COPY . ./
RUN go build -o analyze cmd/analyze/main.go && go build -o worker cmd/worker/main.go
RUN go build -o analyze ./cmd/analyze && go build -o worker ./cmd/worker

FROM ubuntu:22.04@sha256:42ba2dfce475de1113d55602d40af18415897167d47c2045ec7b6d9746ff148f

Expand Down
4 changes: 4 additions & 0 deletions cmd/analyze/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"flag"
"fmt"
"log/slog"
"net/http"
"os"
"strings"

Expand All @@ -19,6 +20,7 @@ import (
"github.com/ossf/package-analysis/internal/resultstore"
"github.com/ossf/package-analysis/internal/sandbox"
"github.com/ossf/package-analysis/internal/staticanalysis"
"github.com/ossf/package-analysis/internal/useragent"
"github.com/ossf/package-analysis/internal/utils"
"github.com/ossf/package-analysis/internal/worker"
"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
Expand Down Expand Up @@ -186,6 +188,8 @@ func run() error {
analysisMode.InitFlag()
flag.Parse()

http.DefaultTransport = useragent.DefaultRoundTripper(http.DefaultTransport, "")

if err := featureflags.Update(*features); err != nil {
return usageError{err}
}
Expand Down
4 changes: 4 additions & 0 deletions cmd/downloader/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ import (
"errors"
"flag"
"fmt"
"net/http"
"os"
"strings"

"github.com/package-url/packageurl-go"

"github.com/ossf/package-analysis/internal/useragent"
"github.com/ossf/package-analysis/internal/worker"
)

Expand Down Expand Up @@ -86,6 +88,8 @@ func processFileLine(text string) error {
func run() error {
flag.Parse()

http.DefaultTransport = useragent.DefaultRoundTripper(http.DefaultTransport, "")

if *purlFilePath == "" {
return newCmdError("Please specify packages to download using -f <file>")
}
Expand Down
80 changes: 80 additions & 0 deletions cmd/worker/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package main

import (
"log/slog"
"os"

"github.com/ossf/package-analysis/internal/resultstore"
"github.com/ossf/package-analysis/internal/worker"
)

// resultBucketPaths holds bucket paths for the different types of results.
type resultBucketPaths struct {
analyzedPkg string
dynamicAnalysis string
executionLog string
fileWrites string
staticAnalysis string
}

type sandboxImageSpec struct {
tag string
noPull bool
}

type config struct {
imageSpec sandboxImageSpec

resultStores *worker.ResultStores

subURL string
packagesBucket string
notificationTopicURL string

userAgentExtra string
}

func (c *config) LogValue() slog.Value {
return slog.GroupValue(
slog.String("subscription", c.subURL),
slog.String("package_bucket", c.packagesBucket),
slog.String("dynamic_results_store", c.resultStores.DynamicAnalysis.String()),
slog.String("static_results_store", c.resultStores.StaticAnalysis.String()),
slog.String("file_write_results_store", c.resultStores.FileWrites.String()),
slog.String("analyzed_packages_store", c.resultStores.AnalyzedPackage.String()),
slog.String("execution_log_store", c.resultStores.ExecutionLog.String()),
slog.String("image_tag", c.imageSpec.tag),
slog.Bool("image_nopull", c.imageSpec.noPull),
slog.String("topic_notification", c.notificationTopicURL),
slog.String("user_agent_extra", c.userAgentExtra),
)
}

func resultStoreForEnv(key string) *resultstore.ResultStore {
val := os.Getenv(key)
if val == "" {
return nil
}
return resultstore.New(val, resultstore.ConstructPath())
}

func configFromEnv() *config {
return &config{
imageSpec: sandboxImageSpec{
tag: os.Getenv("OSSF_SANDBOX_IMAGE_TAG"),
noPull: os.Getenv("OSSF_SANDBOX_NOPULL") != "",
},
resultStores: &worker.ResultStores{
AnalyzedPackage: resultStoreForEnv("OSSF_MALWARE_ANALYZED_PACKAGES"),
DynamicAnalysis: resultStoreForEnv("OSSF_MALWARE_ANALYSIS_RESULTS"),
ExecutionLog: resultStoreForEnv("OSSF_MALWARE_ANALYSIS_EXECUTION_LOGS"),
FileWrites: resultStoreForEnv("OSSF_MALWARE_ANALYSIS_FILE_WRITE_RESULTS"),
StaticAnalysis: resultStoreForEnv("OSSF_MALWARE_STATIC_ANALYSIS_RESULTS"),
},
subURL: os.Getenv("OSSMALWARE_WORKER_SUBSCRIPTION"),
packagesBucket: os.Getenv("OSSF_MALWARE_ANALYSIS_PACKAGES"),
notificationTopicURL: os.Getenv("OSSF_MALWARE_NOTIFICATION_TOPIC"),

userAgentExtra: os.Getenv("OSSF_MALWARE_USER_AGENT_EXTRA"),
}
}
108 changes: 27 additions & 81 deletions cmd/worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ import (
"github.com/ossf/package-analysis/internal/log"
"github.com/ossf/package-analysis/internal/notification"
"github.com/ossf/package-analysis/internal/pkgmanager"
"github.com/ossf/package-analysis/internal/resultstore"
"github.com/ossf/package-analysis/internal/sandbox"
"github.com/ossf/package-analysis/internal/staticanalysis"
"github.com/ossf/package-analysis/internal/useragent"
"github.com/ossf/package-analysis/internal/worker"
"github.com/ossf/package-analysis/pkg/api/pkgecosystem"
)
Expand All @@ -35,20 +35,6 @@ const (
localPkgPathFmt = "/local/%s"
)

// resultBucketPaths holds bucket paths for the different types of results.
type resultBucketPaths struct {
analyzedPkg string
dynamicAnalysis string
executionLog string
fileWrites string
staticAnalysis string
}

type sandboxImageSpec struct {
tag string
noPull bool
}

func copyPackageToLocalFile(ctx context.Context, packagesBucket *blob.Bucket, bucketPath string) (string, *os.File, error) {
if packagesBucket == nil {
return "", nil, errors.New("packages bucket not set")
Expand Down Expand Up @@ -77,29 +63,7 @@ func copyPackageToLocalFile(ctx context.Context, packagesBucket *blob.Bucket, bu
return fmt.Sprintf(localPkgPathFmt, path.Base(bucketPath)), f, nil
}

func makeResultStores(dest resultBucketPaths) worker.ResultStores {
resultStores := worker.ResultStores{}

if dest.analyzedPkg != "" {
resultStores.AnalyzedPackage = resultstore.New(dest.analyzedPkg, resultstore.ConstructPath())
}
if dest.dynamicAnalysis != "" {
resultStores.DynamicAnalysis = resultstore.New(dest.dynamicAnalysis, resultstore.ConstructPath())
}
if dest.executionLog != "" {
resultStores.ExecutionLog = resultstore.New(dest.executionLog, resultstore.ConstructPath())
}
if dest.fileWrites != "" {
resultStores.FileWrites = resultstore.New(dest.fileWrites, resultstore.ConstructPath())
}
if dest.staticAnalysis != "" {
resultStores.StaticAnalysis = resultstore.New(dest.staticAnalysis, resultstore.ConstructPath())
}

return resultStores
}

func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blob.Bucket, resultStores *worker.ResultStores, imageSpec sandboxImageSpec, notificationTopic *pubsub.Topic) error {
func handleMessage(ctx context.Context, msg *pubsub.Message, cfg *config, packagesBucket *blob.Bucket, notificationTopic *pubsub.Topic) error {
name := msg.Metadata["name"]
if name == "" {
slog.WarnContext(ctx, "name is empty")
Expand Down Expand Up @@ -132,7 +96,7 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo
)

localPkgPath := ""
sandboxOpts := []sandbox.Option{sandbox.Tag(imageSpec.tag)}
sandboxOpts := []sandbox.Option{sandbox.Tag(cfg.imageSpec.tag)}

if remotePkgPath != "" {
tmpPkgPath, pkgFile, err := copyPackageToLocalFile(ctx, packagesBucket, remotePkgPath)
Expand All @@ -146,7 +110,7 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo
sandboxOpts = append(sandboxOpts, sandbox.Volume(pkgFile.Name(), localPkgPath))
}

if imageSpec.noPull {
if cfg.imageSpec.noPull {
sandboxOpts = append(sandboxOpts, sandbox.NoPull())
}

Expand All @@ -159,19 +123,24 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo
staticSandboxOpts := append(worker.StaticSandboxOptions(), sandboxOpts...)
dynamicSandboxOpts := append(worker.DynamicSandboxOptions(), sandboxOpts...)

// propogate user agent extras to the static analysis sandbox if it is set.
if cfg.userAgentExtra != "" {
staticSandboxOpts = append(staticSandboxOpts, sandbox.SetEnv("OSSF_MALWARE_USER_AGENT_EXTRA", cfg.userAgentExtra))
}

// run both dynamic and static analysis regardless of error status of either
// and return combined error(s) afterwards, if applicable
staticResults, _, staticAnalysisErr := worker.RunStaticAnalysis(ctx, pkg, staticSandboxOpts, staticanalysis.All)
if staticAnalysisErr == nil {
staticAnalysisErr = worker.SaveStaticAnalysisData(ctx, pkg, resultStores, staticResults)
staticAnalysisErr = worker.SaveStaticAnalysisData(ctx, pkg, cfg.resultStores, staticResults)
}

result, dynamicAnalysisErr := worker.RunDynamicAnalysis(ctx, pkg, dynamicSandboxOpts, "")
if dynamicAnalysisErr == nil {
dynamicAnalysisErr = worker.SaveDynamicAnalysisData(ctx, pkg, resultStores, result.Data)
dynamicAnalysisErr = worker.SaveDynamicAnalysisData(ctx, pkg, cfg.resultStores, result.Data)
}

resultStores.AnalyzedPackageSaved = false
cfg.resultStores.AnalyzedPackageSaved = false

// combine errors
if analysisErr := errors.Join(dynamicAnalysisErr, staticAnalysisErr); analysisErr != nil {
Expand All @@ -187,12 +156,12 @@ func handleMessage(ctx context.Context, msg *pubsub.Message, packagesBucket *blo
return nil
}

func messageLoop(ctx context.Context, subURL, packagesBucket, notificationTopicURL string, imageSpec sandboxImageSpec, resultsBuckets *worker.ResultStores) error {
sub, err := pubsub.OpenSubscription(ctx, subURL)
func messageLoop(ctx context.Context, cfg *config) error {
sub, err := pubsub.OpenSubscription(ctx, cfg.subURL)
if err != nil {
return err
}
extender, err := pubsubextender.New(ctx, subURL, sub)
extender, err := pubsubextender.New(ctx, cfg.subURL, sub)
if err != nil {
return err
}
Expand All @@ -205,18 +174,18 @@ func messageLoop(ctx context.Context, subURL, packagesBucket, notificationTopicU
// we pass in a nil notificationTopic object to handleMessage
// and continue with the analysis with no notifications published
var notificationTopic *pubsub.Topic
if notificationTopicURL != "" {
notificationTopic, err = pubsub.OpenTopic(ctx, notificationTopicURL)
if cfg.notificationTopicURL != "" {
notificationTopic, err = pubsub.OpenTopic(ctx, cfg.notificationTopicURL)
if err != nil {
return err
}
defer notificationTopic.Shutdown(ctx)
}

var pkgsBkt *blob.Bucket
if packagesBucket != "" {
if cfg.packagesBucket != "" {
var err error
pkgsBkt, err = blob.OpenBucket(ctx, packagesBucket)
pkgsBkt, err = blob.OpenBucket(ctx, cfg.packagesBucket)
if err != nil {
return err
}
Expand Down Expand Up @@ -246,7 +215,7 @@ func messageLoop(ctx context.Context, subURL, packagesBucket, notificationTopicU
return fmt.Errorf("error starting message ack deadline extender: %w", err)
}

if err := handleMessage(msgCtx, msg, pkgsBkt, resultsBuckets, imageSpec, notificationTopic); err != nil {
if err := handleMessage(msgCtx, msg, cfg, pkgsBkt, notificationTopic); err != nil {
slog.ErrorContext(msgCtx, "Failed to process message", "error", err)
if err := me.Stop(); err != nil {
slog.ErrorContext(msgCtx, "Extender failed", "error", err)
Expand All @@ -267,35 +236,21 @@ func main() {
log.Initialize(os.Getenv("LOGGER_ENV"))

ctx := context.Background()
subURL := os.Getenv("OSSMALWARE_WORKER_SUBSCRIPTION")
packagesBucket := os.Getenv("OSSF_MALWARE_ANALYSIS_PACKAGES")
notificationTopicURL := os.Getenv("OSSF_MALWARE_NOTIFICATION_TOPIC")
enableProfiler := os.Getenv("OSSF_MALWARE_ANALYSIS_ENABLE_PROFILER")

cfg := configFromEnv()

http.DefaultTransport = useragent.DefaultRoundTripper(http.DefaultTransport, cfg.userAgentExtra)

if err := featureflags.Update(os.Getenv("OSSF_MALWARE_FEATURE_FLAGS")); err != nil {
slog.Error("Failed to parse feature flags", "error", err)
os.Exit(1)
}

resultsBuckets := resultBucketPaths{
analyzedPkg: os.Getenv("OSSF_MALWARE_ANALYZED_PACKAGES"),
dynamicAnalysis: os.Getenv("OSSF_MALWARE_ANALYSIS_RESULTS"),
executionLog: os.Getenv("OSSF_MALWARE_ANALYSIS_EXECUTION_LOGS"),
fileWrites: os.Getenv("OSSF_MALWARE_ANALYSIS_FILE_WRITE_RESULTS"),
staticAnalysis: os.Getenv("OSSF_MALWARE_STATIC_ANALYSIS_RESULTS"),
}
resultStores := makeResultStores(resultsBuckets)

imageSpec := sandboxImageSpec{
tag: os.Getenv("OSSF_SANDBOX_IMAGE_TAG"),
noPull: os.Getenv("OSSF_SANDBOX_NOPULL") != "",
}

sandbox.InitNetwork(ctx)

// If configured, start a webserver so that Go's pprof can be accessed for
// debugging and profiling.
if enableProfiler != "" {
if os.Getenv("OSSF_MALWARE_ANALYSIS_ENABLE_PROFILER") != "" {
go func() {
slog.Info("Starting profiler")
http.ListenAndServe(":6060", nil)
Expand All @@ -304,20 +259,11 @@ func main() {

// Log the configuration of the worker at startup so we can observe it.
slog.InfoContext(ctx, "Starting worker",
"subscription", subURL,
"package_bucket", packagesBucket,
"results_bucket", resultsBuckets.dynamicAnalysis,
"static_results_bucket", resultsBuckets.staticAnalysis,
"file_write_results_bucket", resultsBuckets.fileWrites,
"analyzed_packages_bucket", resultsBuckets.analyzedPkg,
"execution_log_bucket", resultsBuckets.executionLog,
"image_tag", imageSpec.tag,
"image_nopull", imageSpec.noPull,
"topic_notification", notificationTopicURL,
"config", cfg,
"feature_flags", featureflags.State(),
)

err := messageLoop(ctx, subURL, packagesBucket, notificationTopicURL, imageSpec, &resultStores)
err := messageLoop(ctx, cfg)
if err != nil {
slog.ErrorContext(ctx, "Error encountered", "error", err)
}
Expand Down
2 changes: 2 additions & 0 deletions infra/worker/workers-set.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ spec:
value: gs://ossf-malware-analysis-packages
- name: OSSF_MALWARE_NOTIFICATION_TOPIC
value: gcppubsub://projects/ossf-malware-analysis/topics/analysis-notify
- name: OSSF_MALWARE_USER_AGENT_EXTRA
value: "production"
- name: OSSF_MALWARE_FEATURE_FLAGS
value: "CodeExecution"
securityContext:
Expand Down

0 comments on commit f39055d

Please sign in to comment.