From b20437c55c1eb8a9d7291b122ea1bb05fc5f0702 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Thu, 20 Oct 2022 14:15:32 -0400 Subject: [PATCH 01/44] WIP Signed-off-by: Mark Lodato --- docs/_data/versions.yml | 3 + docs/provenance/examples.md | 32 ++ docs/provenance/v1.0.md | 700 ++++++++++++++++++++++++++++++++++++ docs/provenance/v1.0.proto | 8 + 4 files changed, 743 insertions(+) create mode 100644 docs/provenance/examples.md create mode 100644 docs/provenance/v1.0.md create mode 100644 docs/provenance/v1.0.proto diff --git a/docs/_data/versions.yml b/docs/_data/versions.yml index 151a68909..da9c0a04f 100644 --- a/docs/_data/versions.yml +++ b/docs/_data/versions.yml @@ -29,6 +29,9 @@ provenance: name: Version 0.1 v0.2: name: Version 0.2 + v1.0: + name: Version 1.0 (DRAFT) + draft: true current: v0.2 verification_summary: diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md new file mode 100644 index 000000000..777cb8c64 --- /dev/null +++ b/docs/provenance/examples.md @@ -0,0 +1,32 @@ +## GitHub Actions + +```jsonc +"buildDefinition": { + "type": "github-actions-workflow", + "configSource": { + "uri": "https://github.com/MarkLodato/myproject", + "digest": { "sha1": "..." }, + "entryPoint": ".github/workflows/build.yml" + }, + "parameters": { /* object */ }, + "materials": [ + { + "uri": "", + "digest": { /* DigestSet */ } + } + ] +}, +"instanceMetadata": { + "builder": { + "service": "github-actions", + "tenantProject": "", + }, + "invocationId": "", + "startedOn": "", + "finishedOn": "", + "environment": { /* object */ }, // TODO: feels off + "evaluatedConfig": { + "digest": { /* DigestSet */ } + } +}, +``` diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md new file mode 100644 index 000000000..92fddc932 --- /dev/null +++ b/docs/provenance/v1.0.md @@ -0,0 +1,700 @@ +--- +title: Provenance +layout: standard +hero_text: To trace software back to the source and define the moving parts in a complex supply chain, provenance needs to be there from the very beginning. It’s the verifiable information about software artifacts describing where, when and how something was produced. For higher SLSA levels and more resilient integrity guarantees, provenance requirements are stricter and need a deeper, more technical understanding of the predicate. +--- + +
+ +## Purpose + +
+ +Describe how an artifact or set of artifacts was produced. + +This predicate is the recommended way to satisfy the SLSA [provenance +requirements]. + +
+ +## Prerequisite + +
+ +Understanding of SLSA [Software Attestations](/attestation-model) +and the larger [in-toto attestation] framework. + +
+ +## Model + +
+ +Provenance is an attestation that some entity (`builder`) produced one or more +software artifacts (the `subject` of an in-toto attestation [Statement]) by +executing some `invocation`, using some other artifacts as input (`materials`). +The invocation in turn runs the `buildConfig`, which is a record of what was +executed. The builder is trusted to have faithfully recorded the provenance; +there is no option but to trust the builder. However, the builder may have +performed this operation at the request of some external, possibly untrusted +entity. These untrusted parameters are captured in the invocation's `parameters` +and some of the `materials`. Finally, the build may have depended on various +environmental parameters (`environment`) that are needed for +[reproducing][reproducible] the build but that are not under external control. + +See [Example](#example) for a concrete example. + +![Model Diagram](../images/provenance/v0.2/provenance.svg) + +
+ +## Schema + +
+ +```jsonc +{ + // Standard attestation fields: + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [{ /*...*/ }], + + // Predicate: + "predicateType": "https://slsa.dev/provenance/v1.0", + "predicate": { + "buildDefinition": { + "type": "", + "configSource": { + "uri": "", + "digest": { /* DigestSet */ }, + "entryPoint": "" + }, + "parameters": { /* object */ }, + "environment": { /* object */ }, // TODO: feels off + "materials": [ + { + "uri": "", + "digest": { /* DigestSet */ } + } + ] + }, + "instanceMetadata": { + "builder": { + "service": "", + "tenantProject": "", + }, + "invocationId": "", + "startedOn": "", + "finishedOn": "", + "evaluatedConfig": { + "digest": { /* DigestSet */ } + } + }, + } +} +``` + +
+ +### Parsing rules + +
+ +This predicate follows the in-toto attestation [parsing rules]. Summary: + +- Consumers MUST ignore unrecognized fields. +- The `predicateType` URI includes the major version number and will always + change whenever there is a backwards incompatible change. +- Minor version changes are always backwards compatible and "monotonic." Such + changes do not update the `predicateType`. +- Producers MAY add extension fields using field names that are URIs. +- Optional fields MAY be unset or null, and should be treated equivalently. + Both are equivalent to empty for _object_ or _array_ values. + +
+ +### Fields + +
+ +_NOTE: This section describes the fields within `predicate`. For a description +of the other top-level fields, such as `subject`, see [Statement]._ + + +`builder` _object, required_ + +> Identifies the entity that executed the invocation, which is trusted to have +> correctly performed the operation and populated this provenance. +> +> The identity MUST reflect the trust base that consumers care about. How +> detailed to be is a judgement call. For example, [GitHub Actions] supports +> both GitHub-hosted runners and self-hosted runners. The GitHub-hosted runner +> might be a single identity because it's all GitHub from the consumer's +> perspective. Meanwhile, each self-hosted runner might have its own identity +> because not all runners are trusted by all consumers. +> +> Consumers MUST accept only specific (signer, builder) pairs. For example, +> "GitHub" can sign provenance for the "GitHub Actions" builder, and "Google" +> can sign provenance for the "Google Cloud Build" builder, but "GitHub" cannot +> sign for the "Google Cloud Build" builder. +> +> Design rationale: The builder is distinct from the signer because one signer +> may generate attestations for more than one builder, as in the GitHub Actions +> example above. The field is required, even if it is implicit from the signer, +> to aid readability and debugging. It is an object to allow additional fields +> in the future, in case one URI is not sufficient. + + +`builder.id` _string ([TypeURI]), required_ + +> URI indicating the builder's identity. + + +`buildType` _string ([TypeURI]), required_ + +> URI indicating what type of build was performed. It determines the meaning of +> `invocation`, `buildConfig` and `materials`. + + +`invocation` _object, optional_ + +> Identifies the event that kicked off the build. When combined with +> `materials`, this SHOULD fully describe the build, such that re-running this +> invocation results in bit-for-bit identical output (if the build is +> [reproducible]). +> +> MAY be unset/null if unknown, but this is DISCOURAGED. + + +`invocation.configSource` _object, optional_ + +> Describes where the config file that kicked off the build came from. +> This is effectively a pointer to the source where `buildConfig` came from. + + +`invocation.configSource.uri` _string ([ResourceURI]), optional_ + +> URI indicating the identity of the source of the config. + + +`invocation.configSource.digest` _object ([DigestSet]), optional_ + +> Collection of cryptographic digests for the contents of the artifact +> specified by `invocation.configSource.uri`. + + +`invocation.configSource.entryPoint` _string, optional_ + +> String identifying the entry point into the build. This is often a path to a +> configuration file and/or a target label within that file. The syntax and +> meaning are defined by `buildType`. For example, if the `buildType` were +> "make", then this would reference the directory in which to run `make` as well +> as which target to use. +> +> Consumers SHOULD accept only specific `invocation.entryPoint` values. For example, +> a policy might only allow the "release" entry point but not the "debug" entry +> point. +> +> MAY be omitted if the `buildType` specifies a default value. +> +> Design rationale: The `entryPoint` is distinct from `parameters` to make it +> easier to write secure policies without having to parse `parameters`. + + +`invocation.parameters` _object, optional_ + +> Collection of all external inputs that influenced the build on top of +> `invocation.configSource`. For example, if the invocation +> type were "make", then this might be the flags passed to `make` aside from the +> target, which is captured in `invocation.configSource.entryPoint`. +> +> Consumers SHOULD accept only "safe" `invocation.parameters`. The simplest and +> safest way to achieve this is to disallow any `parameters` altogether. +> +> This is an arbitrary JSON object with a schema defined by `buildType`. +> +> This is considered to be incomplete unless `metadata.completeness.parameters` +> is true. + + +`invocation.environment` _object, optional_ + +> Any other builder-controlled inputs necessary for correctly evaluating the +> build. Usually only needed for [reproducing][reproducible] the build but not +> evaluated as part of policy. +> +> This SHOULD be minimized to only include things that are part of the public +> API, that cannot be recomputed from other values in the provenance, and that +> actually affect the evaluation of the build. For example, this might include +> variables that are referenced in the workflow definition, but it SHOULD NOT +> include a dump of all environment variables or include things like the +> hostname (assuming hostname is not part of the public API). +> +> This is an arbitrary JSON object with a schema defined by `buildType`. +> +> This is considered to be incomplete unless `metadata.completeness.environment` +> is true. + + +`metadata` _object, optional_ + +> Other properties of the build. + + +`metadata.buildInvocationId` _string, optional_ + +> Identifies this particular build invocation, which can be useful for finding +> associated logs or other ad-hoc analysis. The exact meaning and format is +> defined by `builder.id`; by default it is treated as opaque and +> case-sensitive. The value SHOULD be globally unique. + + +`metadata.buildStartedOn` _string ([Timestamp]), optional_ + +> The timestamp of when the build started. + + +`metadata.buildFinishedOn` _string ([Timestamp]), optional_ + +> The timestamp of when the build completed. + + +`metadata.completeness` _object, optional_ + +> Indicates that the `builder` claims certain fields in this message to be +> complete. + + +`metadata.completeness.parameters` _boolean, optional_ + +> If true, the `builder` claims that `invocation.parameters` is complete, meaning +> that all external inputs are propertly captured in `invocation.parameters`. + + +`metadata.completeness.environment` _boolean, optional_ + +> If true, the `builder` claims that `invocation.environment` is complete. + + +`metadata.completeness.materials` _boolean, optional_ + +> If true, the `builder` claims that `materials` is complete, usually through +> some controls to prevent network access. Sometimes called "hermetic". + + +`metadata.reproducible` _boolean, optional_ + +> If true, the `builder` claims that running `invocation` on `materials` will +> produce bit-for-bit identical output. + + +`buildConfig` _object, optional_ + +> Lists the steps in the build. +> If `invocation.configSource` is not available, `buildConfig` can be used +> to verify information about the build. +> +> This is an arbitrary JSON object with a schema defined by `buildType`. + + +`materials` _array of objects, optional_ + +> The collection of artifacts that influenced the build including sources, +> dependencies, build tools, base images, and so on. +> +> This is considered to be incomplete unless `metadata.completeness.materials` +> is true. + + +`materials[*].uri` _string ([ResourceURI]), optional_ + +> The method by which this artifact was referenced during the build. +> +> TODO: Should we differentiate between the "referenced" URI and the "resolved" +> URI, e.g. "latest" vs "3.4.1"? +> +> TODO: Should wrap in a `locator` object to allow for extensibility, in case we +> add other types of URIs or other non-URI locators? + + +`materials[*].digest` _object ([DigestSet]), optional_ + +> Collection of cryptographic digests for the contents of this artifact. + +
+ +## Example + +
+ +WARNING: This is just for demonstration purposes. + +Suppose the builder downloaded `example-1.2.3.tar.gz`, extracted it, and ran +`make -C src foo CFLAGS=-O3`, resulting in a file with hash `5678...`. Then the +provenance might look like this: + +```jsonc +{ + "_type": "https://in-toto.io/Statement/v0.1", + // Output file; name is "_" to indicate "not important". + "subject": [{"name": "_", "digest": {"sha256": "5678..."}}], + "predicateType": "https://slsa.dev/provenance/v1.0", + "predicate": { + "buildType": "https://example.com/Makefile", + "builder": { "id": "mailto:person@example.com" }, + "invocation": { + "configSource": { + "uri": "https://example.com/example-1.2.3.tar.gz", + "digest": {"sha256": "1234..."}, + "entryPoint": "src:foo", // target "foo" in directory "src" + }, + "parameters": {"CFLAGS": "-O3"} // extra args to `make` + }, + "materials": [{ + "uri": "https://example.com/example-1.2.3.tar.gz", + "digest": {"sha256": "1234..."} + }] + } +} +``` + +
+ +## More examples + +
+ +
+ +### GitHub Actions + +
+ +WARNING: This is only for demonstration purposes. The GitHub Actions team has +not yet reviewed or approved this design, and it is not yet implemented. Details +are subject to change! + +If GitHub is the one to generate provenance, and the runner is GitHub-hosted, +then the builder would be as follows: + +```json +"builder": { + "id": "https://github.com/Attestations/GitHubHostedActions@v1" +} +``` + +Self-hosted runner: Not yet supported. We need to figure out a URI scheme that +represents what system hosted the runner, or perhaps add additional properties +in `builder`. + +
+ +#### GitHub Actions Workflow + +
+ +```jsonc +"buildType": "https://github.com/Attestations/GitHubActionsWorkflow@v1", +"invocation": { + "configSource": { + "entryPoint": "build.yaml:build", + // The git repo that contains the build.yaml referenced in the entrypoint. + "uri": "git+https://github.com/foo/bar.git", + // The resolved git commit hash reflecting the version of the repo used + // for this build. + "digest": {"sha1": "abc..."} + }, + // The only possible user-defined parameters that can affect the build are the + // "inputs" to a workflow_dispatch event. This is unset/null for all other + // events. + "parameters": { + "inputs": { ... } + }, + // Other variables that are required to reproduce the build and that cannot be + // recomputed using existing information. (Documentation would explain how to + // recompute the rest of the fields.) + "environment": { + // The architecture of the runner. + "arch": "amd64", + // Environment variables. These are always set because it is not possible + // to know whether they were referenced or not. + "env": { + "GITHUB_RUN_ID": "1234", + "GITHUB_RUN_NUMBER": "5678", + "GITHUB_EVENT_NAME": "push" + }, + // The context values that were referenced in the workflow definition. + // Secrets are set to the empty string. + "context": { + "github": { + "run_id": "abcd1234" + }, + "runner": { + "os": "Linux", + "temp": "/tmp/tmp.iizj8l0XhS", + } + } + } +} +"materials": [{ + // The git repo that contains the build.yaml referenced above. + "uri": "git+https://github.com/foo/bar.git", + // The resolved git commit hash reflecting the version of the repo used + // for this build. + "digest": {"sha1": "abc..."} +}] +``` + +
+ +### GitLab CI + +
+ +The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab.com/ee/ci/runners/configure_runners.html#artifact-attestation) capability in their GitLab Runner 15.1 release. + +If GitLab is the one to generate provenance, and the runner is GitLab-hosted or self-hosted, +then the builder would be as follows: + +```jsonc +"builder": { + "id": "https://gitlab.com/foo/bar/-/runners/12345678" +} +``` + +
+ +#### GitLab CI Job + +
+ +```jsonc +"buildType": "https://gitlab.com/gitlab-org/gitlab-runner/-/blob/v15.1.0/PROVENANCE.md", +"invocation": { + "configSource": { + // the git repo that contains the GitLab CI job referenced in the entrypoint + "uri": "https://gitlab.com//foo/bar", + // The resolved git commit hash reflecting the version of the repo used + // for this build. + "digest": { + "sha256": "abc..." + }, + // the name of the CI job that triggered the build + "entryPoint": "build" + }, + // Other variables that are required to reproduce the build and that cannot be + // recomputed using existing information. (Documentation would explain how to + // recompute the rest of the fields.) + "environment": { + // Name of the GitLab runner + "name": "hosted-gitlab-runner", + // The runner executor + "executor": "kubernetes", + // The architecture on which the CI job is run + "architecture": "amd64" + }, + // Collection of all external inputs (CI variables) related to the job + "parameters": { + "CI_PIPELINE_ID": "", + "CI_PIPELINE_URL": "", + // All other CI variable names are listed here. Values are always represented as empty strings to avoid leaking secrets. + } +}, +"metadata": { + "buildStartedOn": "2022-06-17T00:47:27+03:00", + "buildFinishedOn": "2022-06-17T00:47:28+03:00", + "completeness": { + "parameters": true, + "environment": true, + "materials": false + }, + "reproducible": false +} +``` + +
+ +### Google Cloud Build + +
+ +WARNING: This is only for demonstration purposes. The Google Cloud Build team +has not yet reviewed or approved this design, and it is not yet implemented. +Details are subject to change! + +If Google is the one to generate provenance, and the worker is Google-hosted, +then the builder would be as follows: + +```json +"builder": { + "id": "https://cloudbuild.googleapis.com/GoogleHostedWorker@v1" +} +``` + +Custom worker: Not yet supported. We need to figure out a URI scheme that +represents what system hosted the worker, or perhaps add additional properties +in `builder`. + +
+ +#### Cloud Build config-as-code + +
+ +Here `entryPoint` references the `filename` from the CloudBuild +[BuildTrigger](https://cloud.google.com/build/docs/api/reference/rest/v1/projects.triggers). + +```jsonc +"buildType": "https://cloudbuild.googleapis.com/CloudBuildYaml@v1", +"invocation": { + // ... in the git repo described by `materials[0]` ... + "configSource": { + "entryPoint": "path/to/cloudbuild.yaml", + // The git repo that contains the cloudbuild.yaml referenced above. + "uri": "git+https://source.developers.google.com/p/foo/r/bar", + // The resolved git commit hash reflecting the version of the repo used + // for this build. + "digest": {"sha1": "abc..."} + }, + // The only possible user-defined parameters that can affect a BuildTrigger + // are the subtitutions in the BuildTrigger. + "parameters": { + "substitutions": {...} + } +} +"buildConfig": { + // each step in the recipe corresponds to a step in the cloudbuild.yaml + // the format of this is determined by `buildType` + "steps": [ + { + "image": "pkg:docker/make@sha256:244fd47e07d1004f0aed9c", + "arguments": ["build"] + } + ] +} +"materials": [{ + // The git repo that contains the cloudbuild.yaml referenced above. + "uri": "git+https://source.developers.google.com/p/foo/r/bar", + // The resolved git commit hash reflecting the version of the repo used + // for this build. + "digest": {"sha1": "abc..."} +}] +``` + +
+ +#### Cloud Build RPC + +
+ +Here we list the steps defined in a trigger or over RPC: + +```jsonc +"buildType": "https://cloudbuild.googleapis.com/CloudBuildSteps@v1", +"invocation": { + // Build steps were provided as an argument. No `configSource` + "parameters": { + // The substitutions in the build trigger. + "substitutions": {...} + // TODO: Any other arguments? + } +} +"buildConfig": { + // The steps that were performed. (Format TBD.) + "steps": [...] +} +``` + +
+ +### Explicitly run commands + +
+ +WARNING: This is just a proof-of-concept. It is not yet standardized. + +Execution of arbitrary commands: + +```jsonc +"buildType": "https://example.com/ManuallyRunCommands@v1", +// There was no entry point, and the commands were run in an ad-hoc fashion. +// There is no `configSource`. +"invocation": null, +"buildConfig": { + // The list of commands that were executed. + "commands": [ + "tar xvf foo-1.2.3.tar.gz", + "cd foo-1.2.3", + "./configure --enable-some-feature", + "make foo.zip" + ], + // Indicates how to parse the strings in `commands`. + "shell": "bash" +} +``` + +
+ +## Migrating from 0.1 + +
+ +To migrate from [version 0.1][0.1] (`old`): + +```javascript +{ + "builder": old.builder, // (unchanged) + "buildType": old.recipe.type, + "invocation": { + "configSource": { + "uri": old.materials[old.recipe.definedInMaterial].uri, + "digest": old.materials[old.recipe.definedInMaterial].digest, + "entrypoint": old.recipe.entryPoint + }, + "parameters": old.recipe.arguments, + "environment": old.recipe.environment // (unchanged) + }, + "buildConfig": null, // no equivalent in 0.1 + "metadata": { + "buildInvocationId": old.metadata.buildInvocationId, // (unchanged) + "buildStartedOn": old.metadata.buildStartedOn, // (unchanged) + "buildFinishedOn": old.metadata.buildFinishedOn, // (unchanged) + "completeness": { + "parameters": old.metadata.completeness.arguments, + "environment": old.metadata.completeness.environment, // (unchanged) + "materials": old.metadata.completeness.materials, // (unchanged) + }, + "reproducible": old.metadata.reproducible // (unchanged) + }, + "materials": old.materials // optionally removing the configSource +} +``` + +
+ +## Change history + +
+ +- 0.2: Refactored to aid clarity and added `buildConfig`. The model is + unchanged. + - Replaced `definedInMaterial` and `entryPoint` with `configSource`. + - Renamed `recipe` to `invocation`. + - Moved `invocation.type` to top-level `buildType`. + - Renamed `arguments` to `parameters`. + - Added `buildConfig`, which can be used as an alternative to + `configSource` to validate the configuration. +- Renamed to "slsa.dev/provenance". +- 0.1.1: Added `metadata.buildInvocationId`. +- 0.1: Initial version, named "in-toto.io/Provenance" + +[0.1]: v0.1.md +[DigestSet]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#DigestSet +[GitHub Actions]: #github-actions +[Reproducible]: https://reproducible-builds.org +[ResourceURI]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#ResourceURI +[Statement]: https://github.com/in-toto/attestation/blob/main/spec/README.md#statement +[Timestamp]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#Timestamp +[TypeURI]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#TypeURI +[in-toto attestation]: https://github.com/in-toto/attestation +[parsing rules]: https://github.com/in-toto/attestation/blob/main/spec/README.md#parsing-rules +[provenance requirements]: ../spec/{{ site.current_spec_version }}/requirements#provenance-requirements diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto new file mode 100644 index 000000000..ec8a5c68c --- /dev/null +++ b/docs/provenance/v1.0.proto @@ -0,0 +1,8 @@ +syntax = "proto3"; + +package slsa.v1_0; + +message Provenance { + What what = 1; + +} From 2a3eb61de6fc490c71142cc92c16697fed33a32e Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 21 Oct 2022 15:06:16 -0400 Subject: [PATCH 02/44] WIP: finished proto for v1.0 Signed-off-by: Mark Lodato --- docs/_data/provenance/v1_0.yaml | 82 +++++++++++++ docs/provenance/examples.md | 25 ++++ docs/provenance/v1.0.md | 207 ++------------------------------ docs/provenance/v1.0.proto | 186 +++++++++++++++++++++++++++- 4 files changed, 301 insertions(+), 199 deletions(-) create mode 100644 docs/_data/provenance/v1_0.yaml diff --git a/docs/_data/provenance/v1_0.yaml b/docs/_data/provenance/v1_0.yaml new file mode 100644 index 000000000..759b2e302 --- /dev/null +++ b/docs/_data/provenance/v1_0.yaml @@ -0,0 +1,82 @@ +# vim: set shiftwidth=4 softtabstop=4 expandtab: +# +#
+# +# ### Fields (automatic) +# +#
+# +# {% for property in site.data.provenance.v1_0.properties %} +#

{{property[0]}}

+# +#
{{property[1].description | markdownify}}
+# +# {% endfor %} +# +$schema: https://json-schema.org/draft/2020-12/schema +$id: https://slsa.dev/provenance/v1.0.json +title: Provenance +type: object +description: | + TODO + +properties: + buildDefinition: + type: object + description: | + The information necessary and sufficient to: + + 1. Run the build, provided understanding of the build type and + access to all referenced sources and dependencies. + 2. Verify that the build was as expected. + + To ensure that the definition contains sufficient information to build, it + is RECOMMENDED to architect the build system to use this definition as its + sole top-level input. + + To ensure that the definition is practical to verify against expectations, + it is RECOMMENDED to minimize the amount of information contained within + this message. When possible, information SHOULD be moved to the definition + of `type` (so that it is implicit in the build) or + + - MUST contain sufficient information for a downstream system to verify + that the build definition meets some pre-defined expectation. + + This MUST contain sufficient information for a consumer to verify that the + definiton matches some pre-defined expectation. + + This SHOULD contain sufficient information for the build system to (re)run + the build + + + Ideally this SHOULD be the necessary and sufficient information to re-run + the build + SHOULD be minimal + + - Complete: k + - Mininecessary + + properties: + type: + type: string + configSource: + type: object + properties: + digest: + uri: + entryPoint: + parameters: + type: object + buildDependencies: + type: array + items: + type: object + property: + uri: + type: string + entryPoint: + type: string + + instanceMetadata: + type: object + description: no diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md index 777cb8c64..37b4a4af7 100644 --- a/docs/provenance/examples.md +++ b/docs/provenance/examples.md @@ -1,3 +1,28 @@ +## Docker Rebuilder + +```jsonc +"predicate": { + "buildDefinition": { + "type": "https://slsa.dev/docker-build/v0.1", + // Really want two things: + // - source + // - builder image + // In this model, which is which? + "configSource": { + "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", + "digest": { "sha1": "deadbeef" } + } + "additionalSources": [{ + + }] + }, + "runDetails": { + } +} +``` + +## Tekton + ## GitHub Actions ```jsonc diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 92fddc932..1aaf0f609 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -10,7 +10,11 @@ hero_text: To trace software back to the source and define the moving parts in a -Describe how an artifact or set of artifacts was produced. +Describe how an artifact or set of artifacts was produced so that: + +1. Consumers of the provenance can verify that the artifact was built according + to expectations. +2. Others can rebuild the artifact, if desired. This predicate is the recommended way to satisfy the SLSA [provenance requirements]. @@ -69,6 +73,7 @@ See [Example](#example) for a concrete example. "entryPoint": "" }, "parameters": { /* object */ }, + // TODO: arch, os, etc? "environment": { /* object */ }, // TODO: feels off "materials": [ { @@ -119,206 +124,16 @@ This predicate follows the in-toto attestation [parsing rules]. Summary: _NOTE: This section describes the fields within `predicate`. For a description of the other top-level fields, such as `subject`, see [Statement]._ - -`builder` _object, required_ - -> Identifies the entity that executed the invocation, which is trusted to have -> correctly performed the operation and populated this provenance. -> -> The identity MUST reflect the trust base that consumers care about. How -> detailed to be is a judgement call. For example, [GitHub Actions] supports -> both GitHub-hosted runners and self-hosted runners. The GitHub-hosted runner -> might be a single identity because it's all GitHub from the consumer's -> perspective. Meanwhile, each self-hosted runner might have its own identity -> because not all runners are trusted by all consumers. -> -> Consumers MUST accept only specific (signer, builder) pairs. For example, -> "GitHub" can sign provenance for the "GitHub Actions" builder, and "Google" -> can sign provenance for the "Google Cloud Build" builder, but "GitHub" cannot -> sign for the "Google Cloud Build" builder. -> -> Design rationale: The builder is distinct from the signer because one signer -> may generate attestations for more than one builder, as in the GitHub Actions -> example above. The field is required, even if it is implicit from the signer, -> to aid readability and debugging. It is an object to allow additional fields -> in the future, in case one URI is not sufficient. - - -`builder.id` _string ([TypeURI]), required_ - -> URI indicating the builder's identity. - - -`buildType` _string ([TypeURI]), required_ - -> URI indicating what type of build was performed. It determines the meaning of -> `invocation`, `buildConfig` and `materials`. - - -`invocation` _object, optional_ - -> Identifies the event that kicked off the build. When combined with -> `materials`, this SHOULD fully describe the build, such that re-running this -> invocation results in bit-for-bit identical output (if the build is -> [reproducible]). -> -> MAY be unset/null if unknown, but this is DISCOURAGED. - - -`invocation.configSource` _object, optional_ - -> Describes where the config file that kicked off the build came from. -> This is effectively a pointer to the source where `buildConfig` came from. - - -`invocation.configSource.uri` _string ([ResourceURI]), optional_ - -> URI indicating the identity of the source of the config. - - -`invocation.configSource.digest` _object ([DigestSet]), optional_ - -> Collection of cryptographic digests for the contents of the artifact -> specified by `invocation.configSource.uri`. - - -`invocation.configSource.entryPoint` _string, optional_ - -> String identifying the entry point into the build. This is often a path to a -> configuration file and/or a target label within that file. The syntax and -> meaning are defined by `buildType`. For example, if the `buildType` were -> "make", then this would reference the directory in which to run `make` as well -> as which target to use. -> -> Consumers SHOULD accept only specific `invocation.entryPoint` values. For example, -> a policy might only allow the "release" entry point but not the "debug" entry -> point. -> -> MAY be omitted if the `buildType` specifies a default value. -> -> Design rationale: The `entryPoint` is distinct from `parameters` to make it -> easier to write secure policies without having to parse `parameters`. - - -`invocation.parameters` _object, optional_ - -> Collection of all external inputs that influenced the build on top of -> `invocation.configSource`. For example, if the invocation -> type were "make", then this might be the flags passed to `make` aside from the -> target, which is captured in `invocation.configSource.entryPoint`. -> -> Consumers SHOULD accept only "safe" `invocation.parameters`. The simplest and -> safest way to achieve this is to disallow any `parameters` altogether. -> -> This is an arbitrary JSON object with a schema defined by `buildType`. -> -> This is considered to be incomplete unless `metadata.completeness.parameters` -> is true. - - -`invocation.environment` _object, optional_ - -> Any other builder-controlled inputs necessary for correctly evaluating the -> build. Usually only needed for [reproducing][reproducible] the build but not -> evaluated as part of policy. -> -> This SHOULD be minimized to only include things that are part of the public -> API, that cannot be recomputed from other values in the provenance, and that -> actually affect the evaluation of the build. For example, this might include -> variables that are referenced in the workflow definition, but it SHOULD NOT -> include a dump of all environment variables or include things like the -> hostname (assuming hostname is not part of the public API). -> -> This is an arbitrary JSON object with a schema defined by `buildType`. -> -> This is considered to be incomplete unless `metadata.completeness.environment` -> is true. - - -`metadata` _object, optional_ - -> Other properties of the build. - - -`metadata.buildInvocationId` _string, optional_ +### buildDefinition -> Identifies this particular build invocation, which can be useful for finding -> associated logs or other ad-hoc analysis. The exact meaning and format is -> defined by `builder.id`; by default it is treated as opaque and -> case-sensitive. The value SHOULD be globally unique. - - -`metadata.buildStartedOn` _string ([Timestamp]), optional_ - -> The timestamp of when the build started. - - -`metadata.buildFinishedOn` _string ([Timestamp]), optional_ - -> The timestamp of when the build completed. - - -`metadata.completeness` _object, optional_ - -> Indicates that the `builder` claims certain fields in this message to be -> complete. - - -`metadata.completeness.parameters` _boolean, optional_ - -> If true, the `builder` claims that `invocation.parameters` is complete, meaning -> that all external inputs are propertly captured in `invocation.parameters`. - - -`metadata.completeness.environment` _boolean, optional_ - -> If true, the `builder` claims that `invocation.environment` is complete. - - -`metadata.completeness.materials` _boolean, optional_ - -> If true, the `builder` claims that `materials` is complete, usually through -> some controls to prevent network access. Sometimes called "hermetic". - - -`metadata.reproducible` _boolean, optional_ - -> If true, the `builder` claims that running `invocation` on `materials` will -> produce bit-for-bit identical output. - - -`buildConfig` _object, optional_ - -> Lists the steps in the build. -> If `invocation.configSource` is not available, `buildConfig` can be used -> to verify information about the build. -> -> This is an arbitrary JSON object with a schema defined by `buildType`. - - -`materials` _array of objects, optional_ +#### buildDefinition.configSource -> The collection of artifacts that influenced the build including sources, -> dependencies, build tools, base images, and so on. -> -> This is considered to be incomplete unless `metadata.completeness.materials` -> is true. - - -`materials[*].uri` _string ([ResourceURI]), optional_ +##### buildDefinition.configSource.uri -> The method by which this artifact was referenced during the build. -> -> TODO: Should we differentiate between the "referenced" URI and the "resolved" -> URI, e.g. "latest" vs "3.4.1"? -> -> TODO: Should wrap in a `locator` object to allow for extensibility, in case we -> add other types of URIs or other non-URI locators? +##### buildDefinition.configSource.uri - -`materials[*].digest` _object ([DigestSet]), optional_ +### instanceMetadata -> Collection of cryptographic digests for the contents of this artifact.
diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index ec8a5c68c..04ee9de1d 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -1,8 +1,188 @@ syntax = "proto3"; -package slsa.v1_0; +package slsa.v1; + +import "google/protobuf/struct.proto"; + +// NOTE: We use lowerCamelCase for field names because that is how the JSON +// fields are named. The proto style guide advises use of snake_case in the +// .proto file but lowerCamelCase in JSON, but we believe that is confusing for +// readers (since we always serialize to JSON). It also makes it harder to +// reference other fields in comments since there are two ways to reference a +// field. message Provenance { - What what = 1; - + // The set of top-level inputs to the build. This SHOULD contain all the + // information necessary and sufficient to: + // + // 1. Run the build, provided understanding of the build type and access to + // all referenced sources and dependencies. + // 2. Verify that the build was as expected. + // + // The build system should be designed to minimize the amount of information + // necessary here, in order to reduce fragility and ease verification. + // Consumers MUST have an expectation of what "good" looks like; the more + // information that they must check, the harder that task becomes. + // + // Guidelines: + // + // - Maximize the amount of information that is implicit from the meaning of + // `type`. In particular, any value that is boilerplate and the same for + // every build SHOULD be implicit. + // + // - Avoid parameters by moving configuration to source artifacts whenever + // possible. For example, instead of passing in compiler flags via a + // parameter, require them to live next to the code or build + // configuration. + // + // - If possible, architect the build system to use this definition as its + // sole top-level input, in order to guarantee that the information is + // sufficient to run the build. + // + // - Environment variables and artifacts that are not user controlled SHOULD + // be part of the `runDetails`, not here. For example, in GitHub Actions, + // a user can select which VM image but only from a set of pre-defined + // options; that enumeration may go here (though in practice it is part of + // the input artifact so it would not be recorded in the provenance) while + // the resolved VM version would go in `runDetails`. + BuildDefinition buildDefinition = 1; + + // Details about a specific execution of the build definition. Values here + // MUST NOT be user controlled. + RunDetails runDetails = 2; +} + +message BuildDefinition { + // URI indicating how to interpret and act upon this message. + string type = 1 [(subclass) = TypeURI]; + + // The primary top-level input artifact containing the configuration for this + // build. In many cases, this artifact also contains the top-level "code" + // (else it is listed in `resolvedDependencies` or `additionalSources`). + // + // TODO: The term "config source" isn't quite right for the Docker-based + // thing. What should we call it? Maybe also ask ActiveState. + // TODO: Explain how to handle the case when the configuration blob is not + // read directly from source. (Answer: hash it.) + Artifact configSource = 2; + + // Additional top-level input artifacts that were not referenced by + // `configSource`. Most builds do not need this. + // + // This is only used when the build takes multiple independent input + // artifacts, such as when a build has a separate configuration and source. + // + // TODO: Use Tekton/GCB as an example. + repeated Artifact additionalSources = 3; + + // Resolved dependencies needed at build time and referenced by + // `configSource`, `additionalSources`, or (transitively) `dependencies`. + // For example, if the build script fetches and executes "example.com/foo.sh", + // which in turn fetches "example.com/bar.tar.gz", then both "foo.sh" and + // "bar.tar.gz" should be listed here. + // + // SHOULD NOT contain `configSource` or any `additionalSources`. + // + // TODO: Explain how to determine what goes here. + // TODO: Explain that it's OK for it to be incomplete. + // TODO: If the dep is already pinned, does it need to be listed here? + repeated Artifact resolvedDependencies = 4; + + // The identifier relative to `configSource` for the build definition. This is + // often a path to a configuration file and optional target label within that + // file. The syntax and interpretation are defined by `type`. + // + // This field SHOULD be omitted if it is implicit from `type`. This makes it + // the provenance to verify, thus it is preferred when possible. + // + // Verifiers SHOULD compare this to an expected value. + // + // Design rationale: The `entryPoint` is distinct from `parameters` to make it + // easier to verify. Most builds need an entry point but not extra parameters. + string entryPoint = 5; + + // TODO: Enumeration to select between profiles, e.g. debug vs optimized. Is + // this is a good idea? + string profile = 6; + + // TODO: os and architecture. Seems like we need that. How to record it? + + // Additional per-invocation parameters that were not covered by another + // field. + // + // TODO: finish explanation, particularly that it SHOULD be empty + google.protobuf.Struct additionalParameters = 7; +} + +message Artifact { + // TODO: Should we split out "locator" (ephemeral) from "identity" (static)? + string uri = 1 [(subclass) = URI]; + DigestSet digest = 2; + // TODO: add a "local name"? + // TODO: add mediaType? +} + +message DigestSet { + string sha256 = 1; + string sha512 = 2; + // ... +} + +message ParameterSet { + +} + +message RunDetails { + // TODO: The following fields are the same as v0.2: + Builder builder = 1; + + // TODO: description + BuildMetadata metadata = 2; + + // Properties of the build environment that were provided by the `builder` and + // not under user control. The primary intention of this field is for + // debugging, incident response, and vulnerability management. + // + // The values here MAY be required for reproducing the build, but ideally this + // would not be the case. For reproducibility, users should be instructed not + // to rely on these values during the build. + Environment environment = 5; + + // Additional artifacts generated during the build that should not be + // considered the "output" of the build but that may be needed during + // debugging or incident response. + // + // Possible use cases: + // + // - Logs generated during the build. + // - Fully evaluated build configuration. + // + // In most cases, this SHOULD NOT contain all intermediate files generated + // during the build. Instead, this should only contain files that are likely + // to be useful later and that cannot be easily reproduced. + repeated Artifact byproducts = 6; +} + +message Builder { + // TODO: In most cases this is implicit from the envelope layer (e.g. the + // public key or x.509 certificate), which is just one more thing to mess up. + // Can we rescope this to avoid the duplication and thus the security concern? + // For example, if the envelope identifies the build system, this might + // identify the tenant project? + string id = 1 [(subclass) = URI]; + + // TODO: Do we want to add this field? (#319) + string version = 2; +} + +message BuildMetadata { + // TODO: same as v0.2: + string invocationId = 1; + google.protobuf.Timestamp startedOn = 2; + google.protobuf.Timestamp finishedOn = 3; +} + +message Environment { + repeated Artifact systemArtifacts = 1; + google.protobuf.Struct system } From ad7e82356d078fcab61c260c10bacb6fda1f12f3 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 21 Oct 2022 16:00:29 -0400 Subject: [PATCH 03/44] WIP: inputArtifacts, parameters Signed-off-by: Mark Lodato --- docs/provenance/examples.md | 90 ++++++++++++++-------------- docs/provenance/v1.0.proto | 115 ++++++++++++++++++++---------------- 2 files changed, 108 insertions(+), 97 deletions(-) diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md index 37b4a4af7..2d0a61305 100644 --- a/docs/provenance/examples.md +++ b/docs/provenance/examples.md @@ -1,23 +1,49 @@ -## Docker Rebuilder +## Docker Rebuilder -```jsonc +```json5 "predicate": { - "buildDefinition": { - "type": "https://slsa.dev/docker-build/v0.1", - // Really want two things: - // - source - // - builder image - // In this model, which is which? - "configSource": { - "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", - "digest": { "sha1": "deadbeef" } + "buildDefinition": { + "type": "https://slsa.dev/docker-build/v0.1", + "inputArtifacts": { + "source": { + "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", + "digest": { "sha1": "deadbeef" } + }, + "buildImage": { + "uri": "pkg:oci/builder-image?repository_url=gcr.io", + "digest": { "sha256": "53ca44..." } + } + }, + "resolvedDependencies": null, // not recorded for this builder + "parameters": { + "entryPoint": "path/to/config.file" + // If not a config file, it should go in additionalParams, I think. + "additionalParams": { + // Ideally the following are part of the config file: + "outputPath": "...", + "command": "...", + // anything else?... + } + } + }, + "runDetails": { + "builder": { + "id": "... something github whatever ..." + }, + "metadata": { + "invocationId": "...", + "startedOn": "...", + "finishedOn": "...", + }, + "environment": { + "systemArtifacts": { + "builder-binary": { + "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", + "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } + } + } + } } - "additionalSources": [{ - - }] - }, - "runDetails": { - } } ``` @@ -25,33 +51,3 @@ ## GitHub Actions -```jsonc -"buildDefinition": { - "type": "github-actions-workflow", - "configSource": { - "uri": "https://github.com/MarkLodato/myproject", - "digest": { "sha1": "..." }, - "entryPoint": ".github/workflows/build.yml" - }, - "parameters": { /* object */ }, - "materials": [ - { - "uri": "", - "digest": { /* DigestSet */ } - } - ] -}, -"instanceMetadata": { - "builder": { - "service": "github-actions", - "tenantProject": "", - }, - "invocationId": "", - "startedOn": "", - "finishedOn": "", - "environment": { /* object */ }, // TODO: feels off - "evaluatedConfig": { - "digest": { /* DigestSet */ } - } -}, -``` diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 04ee9de1d..5eda101a8 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -16,7 +16,7 @@ message Provenance { // information necessary and sufficient to: // // 1. Run the build, provided understanding of the build type and access to - // all referenced sources and dependencies. + // all referenced input artifacts and dependencies. // 2. Verify that the build was as expected. // // The build system should be designed to minimize the amount of information @@ -30,9 +30,9 @@ message Provenance { // `type`. In particular, any value that is boilerplate and the same for // every build SHOULD be implicit. // - // - Avoid parameters by moving configuration to source artifacts whenever + // - Avoid parameters by moving configuration to input artifacts whenever // possible. For example, instead of passing in compiler flags via a - // parameter, require them to live next to the code or build + // parameter, require them to live next to the source code or build // configuration. // // - If possible, architect the build system to use this definition as its @@ -45,6 +45,9 @@ message Provenance { // options; that enumeration may go here (though in practice it is part of // the input artifact so it would not be recorded in the provenance) while // the resolved VM version would go in `runDetails`. + // + // The accuracy and completeness of this information is implied by + // `runDetails.builder.id`. BuildDefinition buildDefinition = 1; // Details about a specific execution of the build definition. Values here @@ -56,62 +59,45 @@ message BuildDefinition { // URI indicating how to interpret and act upon this message. string type = 1 [(subclass) = TypeURI]; - // The primary top-level input artifact containing the configuration for this - // build. In many cases, this artifact also contains the top-level "code" - // (else it is listed in `resolvedDependencies` or `additionalSources`). + // The top-level, independent input artifacts to the build. In most cases, + // this is a singular "source" to be built. The key is the name and the value + // is an Artifact. The interpretation of the key depends on `type`. // - // TODO: The term "config source" isn't quite right for the Docker-based - // thing. What should we call it? Maybe also ask ActiveState. - // TODO: Explain how to handle the case when the configuration blob is not - // read directly from source. (Answer: hash it.) - Artifact configSource = 2; - - // Additional top-level input artifacts that were not referenced by - // `configSource`. Most builds do not need this. + // Conventional names: + // + // - `source`: The primary input to the build. This name SHOULD be used if + // there is only one possible input. + // - `config`: The build configuration, if independent from `source`. + // - `buildImage`: The container or VM image on which the build occurred, if + // not specified in `config`. // - // This is only used when the build takes multiple independent input - // artifacts, such as when a build has a separate configuration and source. + // In some cases, the build configuration is evaluated client-side and sent + // over the wire, such that the build system cannot determine its origin. In + // those cases, the build system SHOULD serialize the configuration in a + // deterministic way and record the `digest` without a `uri`. This allows one + // to consider the client-side evaluation as a separate "build" with its own + // provenance, such that the verifier can chain the two provenance + // attestations together to determine the origin of the configuration. // - // TODO: Use Tekton/GCB as an example. - repeated Artifact additionalSources = 3; + // TODO: Some requirement that the builder verifies the URI and that the + // verifier checks it against expectations? + map inputArtifacts = 2; // Resolved dependencies needed at build time and referenced by - // `configSource`, `additionalSources`, or (transitively) `dependencies`. - // For example, if the build script fetches and executes "example.com/foo.sh", - // which in turn fetches "example.com/bar.tar.gz", then both "foo.sh" and - // "bar.tar.gz" should be listed here. + // `inputArtifacts`, whether directly or transitively. For example, if the + // build script fetches and executes "example.com/foo.sh", which in turn + // fetches "example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" + // should be listed here. // - // SHOULD NOT contain `configSource` or any `additionalSources`. + // The `inputArtifacts` SHOULD NOT be repeated here. // // TODO: Explain how to determine what goes here. // TODO: Explain that it's OK for it to be incomplete. // TODO: If the dep is already pinned, does it need to be listed here? - repeated Artifact resolvedDependencies = 4; - - // The identifier relative to `configSource` for the build definition. This is - // often a path to a configuration file and optional target label within that - // file. The syntax and interpretation are defined by `type`. - // - // This field SHOULD be omitted if it is implicit from `type`. This makes it - // the provenance to verify, thus it is preferred when possible. - // - // Verifiers SHOULD compare this to an expected value. - // - // Design rationale: The `entryPoint` is distinct from `parameters` to make it - // easier to verify. Most builds need an entry point but not extra parameters. - string entryPoint = 5; - - // TODO: Enumeration to select between profiles, e.g. debug vs optimized. Is - // this is a good idea? - string profile = 6; - - // TODO: os and architecture. Seems like we need that. How to record it? + repeated Artifact resolvedDependencies = 3; - // Additional per-invocation parameters that were not covered by another - // field. - // - // TODO: finish explanation, particularly that it SHOULD be empty - google.protobuf.Struct additionalParameters = 7; + // The inputs to the build that are not artifacts. + ParameterSet parameters = 4; } message Artifact { @@ -129,7 +115,35 @@ message DigestSet { } message ParameterSet { + // The identifier that determines the specific build definition within + // `inputArtifacts`. This is often a path to a configuration file and optional + // target label within that file. The syntax and interpretation are defined by + // `type`. + // + // This field SHOULD be omitted if it is implicit from `type`. Being empty + // simplifies verification of the provenance, thus is preferred when possible. + // + // Verifiers MUST compare this to an expected value. + // + // Design rationale: The `entryPoint` is distinct from `additionalParameters` + // to make it easier to verify. Most builds need an entry point but not extra + // parameters. + string entryPoint = 1; + // TODO: Enumeration to select between profiles, e.g. debug vs optimized. Is + // this is a good idea? + // TODO: Should this be a map or just a repeated string? + map enums = 2; + + // TODO: We probably need os and architecture. How to record it? Anything else + // in that category? + + // Additional per-invocation parameters that were not covered by another + // field. The schema and interpretation are defined by `type`. + // + // TODO: finish explanation, particularly that consumers SHOULD have a way to + // know what is expected, and thus it is simplest if it is empty + google.protobuf.Struct additionalParameters = 3; } message RunDetails { @@ -183,6 +197,7 @@ message BuildMetadata { } message Environment { - repeated Artifact systemArtifacts = 1; - google.protobuf.Struct system + // Artifacts provided by the builder. The key is a name. + map systemArtifacts = 1; + // TODO: environment variables? } From cdecf52997fd14d0206ee04b13e6f50cb9a0f985 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 25 Oct 2022 13:27:25 -0400 Subject: [PATCH 04/44] WIP Signed-off-by: Mark Lodato --- docs/provenance/v1.0.proto | 55 ++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 5eda101a8..a2d224c40 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -60,16 +60,10 @@ message BuildDefinition { string type = 1 [(subclass) = TypeURI]; // The top-level, independent input artifacts to the build. In most cases, - // this is a singular "source" to be built. The key is the name and the value - // is an Artifact. The interpretation of the key depends on `type`. + // this is a singular "source" to be built. // - // Conventional names: - // - // - `source`: The primary input to the build. This name SHOULD be used if - // there is only one possible input. - // - `config`: The build configuration, if independent from `source`. - // - `buildImage`: The container or VM image on which the build occurred, if - // not specified in `config`. + // Each value MUST have a `category`. If there is only one entry, its + // `category` SHOULD be "source". // // In some cases, the build configuration is evaluated client-side and sent // over the wire, such that the build system cannot determine its origin. In @@ -81,7 +75,7 @@ message BuildDefinition { // // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? - map inputArtifacts = 2; + repeated Artifact inputArtifacts = 2; // Resolved dependencies needed at build time and referenced by // `inputArtifacts`, whether directly or transitively. For example, if the @@ -89,6 +83,8 @@ message BuildDefinition { // fetches "example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" // should be listed here. // + // The `category` field is best effort and may be empty if unknown. + // // The `inputArtifacts` SHOULD NOT be repeated here. // // TODO: Explain how to determine what goes here. @@ -101,11 +97,42 @@ message BuildDefinition { } message Artifact { - // TODO: Should we split out "locator" (ephemeral) from "identity" (static)? - string uri = 1 [(subclass) = URI]; + // How this artifact affects the build. + // + // category | description + // ------------ | ----------- + // `source` | The primary input to the build. + // `config` | The build configuration, if different from `source`. + // `buildImage` | The container or VM image on which the build occurred. + // `include` | A dependency included in the output, such as a library. + // `tool` | A build dependency not included in the output, such as a compiler. + // + // TODO: What about dynamic linking / late binding? + string category = 1 [(subclass) = Enum]; + + // Collection of cryptographic digests for the contents of this artifact. DigestSet digest = 2; - // TODO: add a "local name"? - // TODO: add mediaType? + + // The name for this artifact local to the build. + // + // Example: `PyYAML-6.0.tar.gz` + string localName = 3; + + // The canonical identifier for this artifact, such as a Package URL or + // version control URL. This value SHOULD be universal and stable, irrelevant + // of the download or mirror location. + // + // Example: `pkg:pypi/pyyaml@6.0` + string canonicalId = 4 [(subclass) = URI]; + + // The location that this artifact was downloaded from, if not derivable from + // `canonicalId`. + // + // Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` + string downloadLocation = 5 [(subclass) = URI]; + + // The Media Type (aka MIME type) of this artifact. + string mediaType = 6; } message DigestSet { From 2ef36594587d356d4884e20daea7e31ea42711ae Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Thu, 27 Oct 2022 16:38:59 -0400 Subject: [PATCH 05/44] WIP: apply feedback Signed-off-by: Mark Lodato --- docs/_data/provenance/v1_0.yaml | 82 -------------------------------- docs/provenance/extensions.proto | 9 ++++ docs/provenance/v1.0.proto | 35 +++++++++++--- 3 files changed, 37 insertions(+), 89 deletions(-) delete mode 100644 docs/_data/provenance/v1_0.yaml create mode 100644 docs/provenance/extensions.proto diff --git a/docs/_data/provenance/v1_0.yaml b/docs/_data/provenance/v1_0.yaml deleted file mode 100644 index 759b2e302..000000000 --- a/docs/_data/provenance/v1_0.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# vim: set shiftwidth=4 softtabstop=4 expandtab: -# -#
-# -# ### Fields (automatic) -# -#
-# -# {% for property in site.data.provenance.v1_0.properties %} -#

{{property[0]}}

-# -#
{{property[1].description | markdownify}}
-# -# {% endfor %} -# -$schema: https://json-schema.org/draft/2020-12/schema -$id: https://slsa.dev/provenance/v1.0.json -title: Provenance -type: object -description: | - TODO - -properties: - buildDefinition: - type: object - description: | - The information necessary and sufficient to: - - 1. Run the build, provided understanding of the build type and - access to all referenced sources and dependencies. - 2. Verify that the build was as expected. - - To ensure that the definition contains sufficient information to build, it - is RECOMMENDED to architect the build system to use this definition as its - sole top-level input. - - To ensure that the definition is practical to verify against expectations, - it is RECOMMENDED to minimize the amount of information contained within - this message. When possible, information SHOULD be moved to the definition - of `type` (so that it is implicit in the build) or - - - MUST contain sufficient information for a downstream system to verify - that the build definition meets some pre-defined expectation. - - This MUST contain sufficient information for a consumer to verify that the - definiton matches some pre-defined expectation. - - This SHOULD contain sufficient information for the build system to (re)run - the build - - - Ideally this SHOULD be the necessary and sufficient information to re-run - the build - SHOULD be minimal - - - Complete: k - - Mininecessary - - properties: - type: - type: string - configSource: - type: object - properties: - digest: - uri: - entryPoint: - parameters: - type: object - buildDependencies: - type: array - items: - type: object - property: - uri: - type: string - entryPoint: - type: string - - instanceMetadata: - type: object - description: no diff --git a/docs/provenance/extensions.proto b/docs/provenance/extensions.proto new file mode 100644 index 000000000..4186fdfbf --- /dev/null +++ b/docs/provenance/extensions.proto @@ -0,0 +1,9 @@ +syntax = "proto2"; + +package slsa.v1; + +import "google/protobuf/descriptor.proto"; + +extend google.protobuf.FieldOptions { + optional string subclass = 50000; +} diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index a2d224c40..9501e6c31 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -2,7 +2,9 @@ syntax = "proto3"; package slsa.v1; +import "extensions.proto"; import "google/protobuf/struct.proto"; +import "google/protobuf/timestamp.proto"; // NOTE: We use lowerCamelCase for field names because that is how the JSON // fields are named. The proto style guide advises use of snake_case in the @@ -57,9 +59,9 @@ message Provenance { message BuildDefinition { // URI indicating how to interpret and act upon this message. - string type = 1 [(subclass) = TypeURI]; + string type = 1 [(subclass) = "TypeURI"]; - // The top-level, independent input artifacts to the build. In most cases, + // The top-level, independent input artifacts to the build. In many cases, // this is a singular "source" to be built. // // Each value MUST have a `category`. If there is only one entry, its @@ -75,6 +77,13 @@ message BuildDefinition { // // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? + // + // TODO: Idea from Shaun: should we have a tree heirarchy like lock files + // rather than a flat list? Then perhaps we'd only need one field. The + // "inputArtifacts" would be the top level of the hierarchy, and + // `resolvedDependencies` would be all the lower levels. This would also allow + // us to distinguish, if desired, between deps required by the build tool and + // deps required by the source. repeated Artifact inputArtifacts = 2; // Resolved dependencies needed at build time and referenced by @@ -103,12 +112,17 @@ message Artifact { // ------------ | ----------- // `source` | The primary input to the build. // `config` | The build configuration, if different from `source`. + // `builder` | The top-level binary that runs the build. // `buildImage` | The container or VM image on which the build occurred. // `include` | A dependency included in the output, such as a library. // `tool` | A build dependency not included in the output, such as a compiler. // // TODO: What about dynamic linking / late binding? - string category = 1 [(subclass) = Enum]; + // TODO: Should we get rid of include vs tool since there's no way for the + // builder to differentiate? + // TODO: Offer more guidance on what these look like. For example, is + // `builder` always expected? + string category = 1 [(subclass) = "Enum"]; // Collection of cryptographic digests for the contents of this artifact. DigestSet digest = 2; @@ -123,13 +137,13 @@ message Artifact { // of the download or mirror location. // // Example: `pkg:pypi/pyyaml@6.0` - string canonicalId = 4 [(subclass) = URI]; + string canonicalId = 4 [(subclass) = "URI"]; // The location that this artifact was downloaded from, if not derivable from // `canonicalId`. // // Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` - string downloadLocation = 5 [(subclass) = URI]; + string downloadLocation = 5 [(subclass) = "URI"]; // The Media Type (aka MIME type) of this artifact. string mediaType = 6; @@ -157,6 +171,9 @@ message ParameterSet { // parameters. string entryPoint = 1; + // Note: the purpose of this is to make it easier to meet the "parameterless" + // requirement by marking which parameters are "safe". But maybe it's not a + // good idea... // TODO: Enumeration to select between profiles, e.g. debug vs optimized. Is // this is a good idea? // TODO: Should this be a map or just a repeated string? @@ -187,6 +204,10 @@ message RunDetails { // The values here MAY be required for reproducing the build, but ideally this // would not be the case. For reproducibility, users should be instructed not // to rely on these values during the build. + // + // TODO: Move to buildDefinition based on feedback from Shaun. It's weird that + // it's here since it really is an input to the build. Need to figure out some + // way to talk about "security boundaries". Environment environment = 5; // Additional artifacts generated during the build that should not be @@ -210,7 +231,7 @@ message Builder { // Can we rescope this to avoid the duplication and thus the security concern? // For example, if the envelope identifies the build system, this might // identify the tenant project? - string id = 1 [(subclass) = URI]; + string id = 1 [(subclass) = "URI"]; // TODO: Do we want to add this field? (#319) string version = 2; @@ -225,6 +246,6 @@ message BuildMetadata { message Environment { // Artifacts provided by the builder. The key is a name. - map systemArtifacts = 1; + repeated Artifact systemArtifacts = 1; // TODO: environment variables? } From e7dd1e0226c34c205ca7bf94536dac6a905f29df Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Thu, 27 Oct 2022 16:44:51 -0400 Subject: [PATCH 06/44] WIP: replace proto extension with Markdown link Signed-off-by: Mark Lodato --- docs/provenance/extensions.proto | 9 --------- docs/provenance/v1.0.proto | 29 +++++++++++++++-------------- 2 files changed, 15 insertions(+), 23 deletions(-) delete mode 100644 docs/provenance/extensions.proto diff --git a/docs/provenance/extensions.proto b/docs/provenance/extensions.proto deleted file mode 100644 index 4186fdfbf..000000000 --- a/docs/provenance/extensions.proto +++ /dev/null @@ -1,9 +0,0 @@ -syntax = "proto2"; - -package slsa.v1; - -import "google/protobuf/descriptor.proto"; - -extend google.protobuf.FieldOptions { - optional string subclass = 50000; -} diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 9501e6c31..90022bc54 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -2,7 +2,6 @@ syntax = "proto3"; package slsa.v1; -import "extensions.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; @@ -58,8 +57,8 @@ message Provenance { } message BuildDefinition { - // URI indicating how to interpret and act upon this message. - string type = 1 [(subclass) = "TypeURI"]; + // [TypeURI] indicating how to interpret and act upon this message. + string type = 1; // The top-level, independent input artifacts to the build. In many cases, // this is a singular "source" to be built. @@ -106,7 +105,7 @@ message BuildDefinition { } message Artifact { - // How this artifact affects the build. + // Enum describing how this artifact affects the build. // // category | description // ------------ | ----------- @@ -122,7 +121,8 @@ message Artifact { // builder to differentiate? // TODO: Offer more guidance on what these look like. For example, is // `builder` always expected? - string category = 1 [(subclass) = "Enum"]; + // TODO: Are other values allowed, beyond those listed above? + string category = 1; // Collection of cryptographic digests for the contents of this artifact. DigestSet digest = 2; @@ -132,20 +132,20 @@ message Artifact { // Example: `PyYAML-6.0.tar.gz` string localName = 3; - // The canonical identifier for this artifact, such as a Package URL or - // version control URL. This value SHOULD be universal and stable, irrelevant - // of the download or mirror location. + // [URI] that is the canonical identifier for this artifact, such as a Package + // URL or version control URL. This value SHOULD be universal and stable, + // irrelevant of the download or mirror location. // // Example: `pkg:pypi/pyyaml@6.0` - string canonicalId = 4 [(subclass) = "URI"]; + string canonicalId = 4; - // The location that this artifact was downloaded from, if not derivable from - // `canonicalId`. + // [URI] identifying the location that this artifact was downloaded from, if + // not derivable from `canonicalId`. // // Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` - string downloadLocation = 5 [(subclass) = "URI"]; + string downloadLocation = 5; - // The Media Type (aka MIME type) of this artifact. + // [Media Type] (aka MIME type) of this artifact. string mediaType = 6; } @@ -226,12 +226,13 @@ message RunDetails { } message Builder { + // [URI] ... (same as v0.2) // TODO: In most cases this is implicit from the envelope layer (e.g. the // public key or x.509 certificate), which is just one more thing to mess up. // Can we rescope this to avoid the duplication and thus the security concern? // For example, if the envelope identifies the build system, this might // identify the tenant project? - string id = 1 [(subclass) = "URI"]; + string id = 1; // TODO: Do we want to add this field? (#319) string version = 2; From c3089dee73f7db0cba7de66ee27ea0ddf9414697 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 28 Oct 2022 14:25:10 -0400 Subject: [PATCH 07/44] Update example to use latest version of proto Signed-off-by: Mark Lodato --- docs/provenance/examples.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md index 2d0a61305..88e272fca 100644 --- a/docs/provenance/examples.md +++ b/docs/provenance/examples.md @@ -4,16 +4,19 @@ "predicate": { "buildDefinition": { "type": "https://slsa.dev/docker-build/v0.1", - "inputArtifacts": { - "source": { + "inputArtifacts": [ + { + "category": "source", "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", "digest": { "sha1": "deadbeef" } }, - "buildImage": { + { + "category": "buildImage", "uri": "pkg:oci/builder-image?repository_url=gcr.io", "digest": { "sha256": "53ca44..." } } - }, + ] + , "resolvedDependencies": null, // not recorded for this builder "parameters": { "entryPoint": "path/to/config.file" @@ -36,8 +39,9 @@ "finishedOn": "...", }, "environment": { - "systemArtifacts": { - "builder-binary": { + "systemArtifacts": [ + { + "category": "builderBinary": { "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } } From 25c78c31c9322040a4e5ea028fa57eb08e96587b Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 28 Oct 2022 15:55:00 -0400 Subject: [PATCH 08/44] artifacts: go back to map, uri Signed-off-by: Mark Lodato --- docs/provenance/examples.md | 16 ++++------- docs/provenance/v1.0.proto | 55 ++++++++++++++++--------------------- 2 files changed, 29 insertions(+), 42 deletions(-) diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md index 88e272fca..2d0a61305 100644 --- a/docs/provenance/examples.md +++ b/docs/provenance/examples.md @@ -4,19 +4,16 @@ "predicate": { "buildDefinition": { "type": "https://slsa.dev/docker-build/v0.1", - "inputArtifacts": [ - { - "category": "source", + "inputArtifacts": { + "source": { "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", "digest": { "sha1": "deadbeef" } }, - { - "category": "buildImage", + "buildImage": { "uri": "pkg:oci/builder-image?repository_url=gcr.io", "digest": { "sha256": "53ca44..." } } - ] - , + }, "resolvedDependencies": null, // not recorded for this builder "parameters": { "entryPoint": "path/to/config.file" @@ -39,9 +36,8 @@ "finishedOn": "...", }, "environment": { - "systemArtifacts": [ - { - "category": "builderBinary": { + "systemArtifacts": { + "builder-binary": { "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } } diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 90022bc54..1ae0d822d 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -63,8 +63,16 @@ message BuildDefinition { // The top-level, independent input artifacts to the build. In many cases, // this is a singular "source" to be built. // - // Each value MUST have a `category`. If there is only one entry, its - // `category` SHOULD be "source". + // The key is a name whose interpretations depends on `type`. If there is only + // one input, it SHOULD be named "source". The following conventional names + // are RECOMMENDED when appropriate: + // + // name | description + // ------------ | ----------- + // `source` | The primary input to the build. + // `config` | The build configuration, if different from `source`. + // `builder` | The top-level binary that runs the build. + // `buildImage` | The container or VM image in which the build occurred. // // In some cases, the build configuration is evaluated client-side and sent // over the wire, such that the build system cannot determine its origin. In @@ -83,7 +91,7 @@ message BuildDefinition { // `resolvedDependencies` would be all the lower levels. This would also allow // us to distinguish, if desired, between deps required by the build tool and // deps required by the source. - repeated Artifact inputArtifacts = 2; + map inputArtifacts = 2; // Resolved dependencies needed at build time and referenced by // `inputArtifacts`, whether directly or transitively. For example, if the @@ -105,24 +113,12 @@ message BuildDefinition { } message Artifact { - // Enum describing how this artifact affects the build. + // [URI] describing where this artifact came from. When possible, this SHOULD + // be a universal and stable identifier, such as a source location or Package + // URL. // - // category | description - // ------------ | ----------- - // `source` | The primary input to the build. - // `config` | The build configuration, if different from `source`. - // `builder` | The top-level binary that runs the build. - // `buildImage` | The container or VM image on which the build occurred. - // `include` | A dependency included in the output, such as a library. - // `tool` | A build dependency not included in the output, such as a compiler. - // - // TODO: What about dynamic linking / late binding? - // TODO: Should we get rid of include vs tool since there's no way for the - // builder to differentiate? - // TODO: Offer more guidance on what these look like. For example, is - // `builder` always expected? - // TODO: Are other values allowed, beyond those listed above? - string category = 1; + // Example: `pkg:pypi/pyyaml@6.0` + string uri = 1; // Collection of cryptographic digests for the contents of this artifact. DigestSet digest = 2; @@ -132,21 +128,14 @@ message Artifact { // Example: `PyYAML-6.0.tar.gz` string localName = 3; - // [URI] that is the canonical identifier for this artifact, such as a Package - // URL or version control URL. This value SHOULD be universal and stable, - // irrelevant of the download or mirror location. - // - // Example: `pkg:pypi/pyyaml@6.0` - string canonicalId = 4; - // [URI] identifying the location that this artifact was downloaded from, if - // not derivable from `canonicalId`. + // different and not derivable from `canonicalId`. // // Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` - string downloadLocation = 5; + string downloadLocation = 4; // [Media Type] (aka MIME type) of this artifact. - string mediaType = 6; + string mediaType = 5; } message DigestSet { @@ -246,7 +235,9 @@ message BuildMetadata { } message Environment { - // Artifacts provided by the builder. The key is a name. - repeated Artifact systemArtifacts = 1; + // Artifacts provided by the builder. The key is a name, similar to + // `BuildDefinition.inputArtifacts`. + map systemArtifacts = 1; + // TODO: environment variables? } From 10151669d9e7396fc35f2dc3d43bab5e8f1ce411 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Mon, 31 Oct 2022 11:57:50 -0400 Subject: [PATCH 09/44] WIP: topLevelInputs and buildDependencies Move all top-level inputs to their own field so that it is clear what must be verified. Move resolvedDependencies outside since it's not top-level. Create new buildDependencies containing resolvedDependencies and environment. This way buildDefinition is complete. Revert field names to their v0.2 names, since the name change isn't worth the switch. Signed-off-by: Mark Lodato --- docs/provenance/examples.md | 88 +++++++++++------ docs/provenance/v1.0.proto | 191 ++++++++++++++++-------------------- 2 files changed, 145 insertions(+), 134 deletions(-) diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md index 2d0a61305..2576177c8 100644 --- a/docs/provenance/examples.md +++ b/docs/provenance/examples.md @@ -1,30 +1,31 @@ ## Docker Rebuilder -```json5 +```json "predicate": { "buildDefinition": { - "type": "https://slsa.dev/docker-build/v0.1", - "inputArtifacts": { - "source": { - "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", - "digest": { "sha1": "deadbeef" } + "topLevelInputs": { + "buildType": "https://slsa.dev/docker-build/v0.1", + "inputArtifacts": { + "source": { + "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", + "digest": { "sha1": "deadbeef" } + }, + "buildImage": { + "uri": "pkg:oci/builder-image?repository_url=gcr.io", + "digest": { "sha256": "53ca44..." } + }, + "builderBinary": { + "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", + "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } + } }, - "buildImage": { - "uri": "pkg:oci/builder-image?repository_url=gcr.io", - "digest": { "sha256": "53ca44..." } - } - }, - "resolvedDependencies": null, // not recorded for this builder - "parameters": { - "entryPoint": "path/to/config.file" - // If not a config file, it should go in additionalParams, I think. - "additionalParams": { - // Ideally the following are part of the config file: + "entryPoint": "path/to/config.file", + "parameters": { "outputPath": "...", - "command": "...", - // anything else?... + "command": "..." } - } + }, + "buildDependencies": null }, "runDetails": { "builder": { @@ -33,21 +34,48 @@ "metadata": { "invocationId": "...", "startedOn": "...", - "finishedOn": "...", + "finishedOn": "..." }, - "environment": { - "systemArtifacts": { - "builder-binary": { - "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", - "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } - } - } - } + "byproducts": null } } ``` ## Tekton -## GitHub Actions +## GitHub Actions Builder + +```json +"predicate": { + "buildDefinition": { + "topLevelInputs": { + "buildType": "... github actions ...", + "inputArtifacts": { + "source": { + "uri": "...", + "digest": { "sha1": "deadbeef" } + }, + "builderBinary": { + "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", + "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } + } + }, + "entryPoint": ".github/workflow/build.yml", + "parameters": null + }, + "buildDependencies": null + }, + "runDetails": { + "builder": { + "id": "... something github whatever ..." + }, + "metadata": { + "invocationId": "...", + "startedOn": "...", + "finishedOn": "..." + }, + "byproducts": null + } +} +``` diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 1ae0d822d..6d72479c3 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -5,14 +5,31 @@ package slsa.v1; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; -// NOTE: We use lowerCamelCase for field names because that is how the JSON -// fields are named. The proto style guide advises use of snake_case in the -// .proto file but lowerCamelCase in JSON, but we believe that is confusing for -// readers (since we always serialize to JSON). It also makes it harder to -// reference other fields in comments since there are two ways to reference a -// field. +// Design guidelines: +// +// - Use lowerCamelCase for field names because that is how the JSON fields +// are named. The proto style guide advises use of snake_case in the .proto +// file but lowerCamelCase in JSON, but we believe that is confusing for +// readers (since we always serialize to JSON). It also makes it harder to +// reference other fields in comments since there are two ways to reference +// a field. +// +// - Prefer field names that are unique throughout the file, even if somewhat +// redundant with the parent field. This makes it easier to reference in +// documentation and reduces confusion. message Provenance { + // The input to the build. + // + // The accuracy and completeness of this information is implied by + // `runDetails.builder.id`. + BuildDefinition buildDefinition = 1; + + // Details specific to this particular execution of the build. + RunDetails runDetails = 2; +} + +message BuildDefinition { // The set of top-level inputs to the build. This SHOULD contain all the // information necessary and sufficient to: // @@ -28,8 +45,8 @@ message Provenance { // Guidelines: // // - Maximize the amount of information that is implicit from the meaning of - // `type`. In particular, any value that is boilerplate and the same for - // every build SHOULD be implicit. + // `buildType`. In particular, any value that is boilerplate and the same + // for every build SHOULD be implicit. // // - Avoid parameters by moving configuration to input artifacts whenever // possible. For example, instead of passing in compiler flags via a @@ -39,40 +56,38 @@ message Provenance { // - If possible, architect the build system to use this definition as its // sole top-level input, in order to guarantee that the information is // sufficient to run the build. - // - // - Environment variables and artifacts that are not user controlled SHOULD - // be part of the `runDetails`, not here. For example, in GitHub Actions, - // a user can select which VM image but only from a set of pre-defined - // options; that enumeration may go here (though in practice it is part of - // the input artifact so it would not be recorded in the provenance) while - // the resolved VM version would go in `runDetails`. - // - // The accuracy and completeness of this information is implied by - // `runDetails.builder.id`. - BuildDefinition buildDefinition = 1; + BuildInputs topLevelInputs = 1; - // Details about a specific execution of the build definition. Values here - // MUST NOT be user controlled. - RunDetails runDetails = 2; + // Other information necessary to perform the build but either derived from + // `topLevelInputs` or provided by the build system. + // + // The values here SHOULD NOT be under external control unless they are + // constrained and unlikely to significantly affect the build. For example, + // the username who triggered the build might be listed here, rather than + // `topLevelInputs`, because it is a small input and unlikely to be affect the + // behavior of the output, aside from simple inclusion. + BuildDependencies buildDependencies = 2; } -message BuildDefinition { +message BuildInputs { // [TypeURI] indicating how to interpret and act upon this message. - string type = 1; + // + // TODO: Provide more guidance on how to define this. + string buildType = 1; // The top-level, independent input artifacts to the build. In many cases, // this is a singular "source" to be built. // - // The key is a name whose interpretations depends on `type`. If there is only - // one input, it SHOULD be named "source". The following conventional names - // are RECOMMENDED when appropriate: + // The key is a name whose interpretation depends on `buildType`. If there is + // only one input, it SHOULD be named "source". The following conventional + // names are RECOMMENDED when appropriate: // - // name | description - // ------------ | ----------- - // `source` | The primary input to the build. - // `config` | The build configuration, if different from `source`. - // `builder` | The top-level binary that runs the build. - // `buildImage` | The container or VM image in which the build occurred. + // name | description + // --------------- | ----------- + // `source` | The primary input to the build. + // `config` | The build configuration, if different from `source`. + // `builderBinary` | The top-level binary that runs the build. + // `buildImage` | The container or VM image in which the build occurred. // // In some cases, the build configuration is evaluated client-side and sent // over the wire, such that the build system cannot determine its origin. In @@ -84,32 +99,53 @@ message BuildDefinition { // // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? - // - // TODO: Idea from Shaun: should we have a tree heirarchy like lock files - // rather than a flat list? Then perhaps we'd only need one field. The - // "inputArtifacts" would be the top level of the hierarchy, and - // `resolvedDependencies` would be all the lower levels. This would also allow - // us to distinguish, if desired, between deps required by the build tool and - // deps required by the source. map inputArtifacts = 2; + // The identifier that determines the specific build definition within + // `inputArtifacts`. This is often a path to a configuration file and optional + // target label within that file. The syntax and interpretation are defined by + // `buildType`. + // + // This field SHOULD be omitted if it is implicit from `buildType`. Being + // empty simplifies verification of the provenance, thus is preferred when + // possible. + // + // Design rationale: The `entryPoint` is distinct from `parameters` to make it + // easier to verify. Most builds need an entry point but not extra parameters. + string entryPoint = 3; + + // TODO: We probably need os and architecture. How to record it? Anything else + // in that category? + + // Additional per-invocation parameters that were not covered by another + // field. The schema and interpretation are defined by `buildType`. + // + // TODO: finish explanation, particularly that consumers SHOULD have a way to + // know what is expected, and thus it is simplest if it is empty + google.protobuf.Struct parameters = 4; +} + +message BuildDependencies { // Resolved dependencies needed at build time and referenced by - // `inputArtifacts`, whether directly or transitively. For example, if the + // `buildDefinition`, whether directly or transitively. For example, if the // build script fetches and executes "example.com/foo.sh", which in turn // fetches "example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" // should be listed here. // - // The `category` field is best effort and may be empty if unknown. - // // The `inputArtifacts` SHOULD NOT be repeated here. // + // TODO: Explain what the purpose of this field is. Why do we need it? // TODO: Explain how to determine what goes here. // TODO: Explain that it's OK for it to be incomplete. // TODO: If the dep is already pinned, does it need to be listed here? - repeated Artifact resolvedDependencies = 3; - - // The inputs to the build that are not artifacts. - ParameterSet parameters = 4; + repeated Artifact resolvedDependencies = 1; + + // Parameters of the build environment that were provided by the `builder` and + // not under external control. The primary intention of this field is for + // debugging, incident response, and vulnerability management. The values here + // MAY be necessary for reproducing the build, but ideally this would not be + // the case. + google.protobuf.Struct environment = 2; } message Artifact { @@ -144,41 +180,6 @@ message DigestSet { // ... } -message ParameterSet { - // The identifier that determines the specific build definition within - // `inputArtifacts`. This is often a path to a configuration file and optional - // target label within that file. The syntax and interpretation are defined by - // `type`. - // - // This field SHOULD be omitted if it is implicit from `type`. Being empty - // simplifies verification of the provenance, thus is preferred when possible. - // - // Verifiers MUST compare this to an expected value. - // - // Design rationale: The `entryPoint` is distinct from `additionalParameters` - // to make it easier to verify. Most builds need an entry point but not extra - // parameters. - string entryPoint = 1; - - // Note: the purpose of this is to make it easier to meet the "parameterless" - // requirement by marking which parameters are "safe". But maybe it's not a - // good idea... - // TODO: Enumeration to select between profiles, e.g. debug vs optimized. Is - // this is a good idea? - // TODO: Should this be a map or just a repeated string? - map enums = 2; - - // TODO: We probably need os and architecture. How to record it? Anything else - // in that category? - - // Additional per-invocation parameters that were not covered by another - // field. The schema and interpretation are defined by `type`. - // - // TODO: finish explanation, particularly that consumers SHOULD have a way to - // know what is expected, and thus it is simplest if it is empty - google.protobuf.Struct additionalParameters = 3; -} - message RunDetails { // TODO: The following fields are the same as v0.2: Builder builder = 1; @@ -186,19 +187,6 @@ message RunDetails { // TODO: description BuildMetadata metadata = 2; - // Properties of the build environment that were provided by the `builder` and - // not under user control. The primary intention of this field is for - // debugging, incident response, and vulnerability management. - // - // The values here MAY be required for reproducing the build, but ideally this - // would not be the case. For reproducibility, users should be instructed not - // to rely on these values during the build. - // - // TODO: Move to buildDefinition based on feedback from Shaun. It's weird that - // it's here since it really is an input to the build. Need to figure out some - // way to talk about "security boundaries". - Environment environment = 5; - // Additional artifacts generated during the build that should not be // considered the "output" of the build but that may be needed during // debugging or incident response. @@ -211,7 +199,10 @@ message RunDetails { // In most cases, this SHOULD NOT contain all intermediate files generated // during the build. Instead, this should only contain files that are likely // to be useful later and that cannot be easily reproduced. - repeated Artifact byproducts = 6; + // + // TODO: Do we need some recommendation for how to distinguish between + // byproducts? For example, should we recommend using `localName`? + repeated Artifact byproducts = 3; } message Builder { @@ -233,11 +224,3 @@ message BuildMetadata { google.protobuf.Timestamp startedOn = 2; google.protobuf.Timestamp finishedOn = 3; } - -message Environment { - // Artifacts provided by the builder. The key is a name, similar to - // `BuildDefinition.inputArtifacts`. - map systemArtifacts = 1; - - // TODO: environment variables? -} From 861d84471aee6a79266b7bbff615a5e665592ccf Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Mon, 31 Oct 2022 13:34:55 -0400 Subject: [PATCH 10/44] WIP: make examples more realistic Signed-off-by: Mark Lodato --- docs/provenance/examples.md | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md index 2576177c8..e88b0bdd2 100644 --- a/docs/provenance/examples.md +++ b/docs/provenance/examples.md @@ -1,10 +1,10 @@ -## Docker Rebuilder +## Docker-based Rebuilder ```json "predicate": { "buildDefinition": { "topLevelInputs": { - "buildType": "https://slsa.dev/docker-build/v0.1", + "buildType": "https://slsa.dev/docker-based-build/v0.1", "inputArtifacts": { "source": { "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", @@ -29,7 +29,7 @@ }, "runDetails": { "builder": { - "id": "... something github whatever ..." + "id": "..." }, "metadata": { "invocationId": "...", @@ -49,25 +49,44 @@ "predicate": { "buildDefinition": { "topLevelInputs": { - "buildType": "... github actions ...", + "buildType": "https://github.com/slsa-framework/slsa-github-generator/go@v1", "inputArtifacts": { "source": { - "uri": "...", - "digest": { "sha1": "deadbeef" } + "uri": "git+https://github.com/laurentsimon/slsa-verifier-test-gen@refs/heads/main", + "digest": { + "sha1": "15bf79ea9c89fffbf5dd02c6b5b686b291bfcbd2" + } }, "builderBinary": { "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } } }, - "entryPoint": ".github/workflow/build.yml", + "entryPoint": ".github/workflow/release.yml", "parameters": null }, - "buildDependencies": null + "buildDependencies": { + "resolvedDependencies": [ + { + "uri": "git+https://github.com/laurentsimon/slsa-verifier-test-gen@refs/heads/main", + "digest": { + "sha1": "15bf79ea9c89fffbf5dd02c6b5b686b291bfcbd2" + } + }, + { + "uri": "https://github.com/actions/virtual-environments/releases/tag/ubuntu20/20220515.1" + } + ], + "environment": { + "github_actor": "...", + "github_event_name": "workflow_dispatch", + ... + } + } }, "runDetails": { "builder": { - "id": "... something github whatever ..." + "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@refs/tags/v0.0.1" }, "metadata": { "invocationId": "...", From f499fadc23dbc2aa1d8c6a81a3a3bbd73ca9bda1 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Mon, 31 Oct 2022 13:35:14 -0400 Subject: [PATCH 11/44] WIP: add cue file Signed-off-by: Mark Lodato --- docs/provenance/v1.0.cue | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docs/provenance/v1.0.cue diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue new file mode 100644 index 000000000..8236511a4 --- /dev/null +++ b/docs/provenance/v1.0.cue @@ -0,0 +1,42 @@ +Provenance: { + "buildDefinition": { + "topLevelInputs": { + "buildType": string, + "inputArtifacts": {...Artifact}, + "entryPoint": string, + "additionalParameters": {...} + }, + "buildDependencies": { + "resolvedDependencies": [...Artifact], + "environment": {...} + } + }, + "runDetails": { + "builder": {, + "id": string, + "version": string + }, + "metadata": {, + "invocationId": string, + "startedOn": time.Time, + "finishedOn": time.Time + }, + "byproducts": [...Artifact] + } +} + +Artifact: { + "uri": string, + "digest": DigestSet, + "localName": string, + "downloadLocation": string, + "mediaType": string +} + +DigestSet: { + "sha256": string, + "sha512": string, + "sha1": string, + // TODO: list the other standard algorithms + ...string +} From cec378584e1b24c84fd0a60a0ae5f269e045f4c3 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Mon, 31 Oct 2022 14:06:36 -0400 Subject: [PATCH 12/44] WIP: add Tekton example and TODO Signed-off-by: Mark Lodato --- docs/provenance/examples.md | 42 +++++++++++++++++++++++++++++++++++++ docs/provenance/v1.0.proto | 2 ++ 2 files changed, 44 insertions(+) diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md index e88b0bdd2..342647a35 100644 --- a/docs/provenance/examples.md +++ b/docs/provenance/examples.md @@ -43,6 +43,48 @@ ## Tekton +```json +"predicate": { + "buildDefinition": { + "topLevelInputs": { + "type": "https://tekton.dev/tekton-task/v0.1", + "inputArtifacts": { + "source": { + "uri": "git+https://github.com/tektoncd/catalog.git", + "digest": { "sha1": "962f9005350b1acb359558b6c5aafc420e0beb46" } + } + }, + "entryPoint": "path/to/sample_taskrun.yaml", + "parameters": null + }, + "buildDependencies": { + "resolvedDependencies": [ + { + "localName": "systemConfiguration", + "digest": { "sha256": "..." }, + "mediaType": "... some tekton config thing? ..." + } + ], + "environment": { + "additionalParams": { + } + } + } + }, + "runDetails": { + "builder": { + "id": "...whoever is running Tekton..." + }, + "metadata": { + "invocationId": "...", + "startedOn": "...", + "finishedOn": "..." + }, + "byproducts": null + } +} +``` + ## GitHub Actions Builder ```json diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 6d72479c3..8fd430fc9 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -138,6 +138,8 @@ message BuildDependencies { // TODO: Explain how to determine what goes here. // TODO: Explain that it's OK for it to be incomplete. // TODO: If the dep is already pinned, does it need to be listed here? + // TODO: Should this be a map instead of an array? Then each MUST be named + // explicitly, which would be less ambiguous but more difficult. repeated Artifact resolvedDependencies = 1; // Parameters of the build environment that were provided by the `builder` and From 82a3c281f932bacb9006dc1fe4b9dae991498a3c Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 1 Nov 2022 08:24:09 -0400 Subject: [PATCH 13/44] WIP: merge everything into markdown file Signed-off-by: Mark Lodato --- docs/provenance/examples.md | 142 --------- docs/provenance/v1.0.cue | 72 ++--- docs/provenance/v1.0.md | 553 ++++++++++++++---------------------- 3 files changed, 254 insertions(+), 513 deletions(-) delete mode 100644 docs/provenance/examples.md diff --git a/docs/provenance/examples.md b/docs/provenance/examples.md deleted file mode 100644 index 342647a35..000000000 --- a/docs/provenance/examples.md +++ /dev/null @@ -1,142 +0,0 @@ -## Docker-based Rebuilder - -```json -"predicate": { - "buildDefinition": { - "topLevelInputs": { - "buildType": "https://slsa.dev/docker-based-build/v0.1", - "inputArtifacts": { - "source": { - "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", - "digest": { "sha1": "deadbeef" } - }, - "buildImage": { - "uri": "pkg:oci/builder-image?repository_url=gcr.io", - "digest": { "sha256": "53ca44..." } - }, - "builderBinary": { - "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", - "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } - } - }, - "entryPoint": "path/to/config.file", - "parameters": { - "outputPath": "...", - "command": "..." - } - }, - "buildDependencies": null - }, - "runDetails": { - "builder": { - "id": "..." - }, - "metadata": { - "invocationId": "...", - "startedOn": "...", - "finishedOn": "..." - }, - "byproducts": null - } -} -``` - -## Tekton - -```json -"predicate": { - "buildDefinition": { - "topLevelInputs": { - "type": "https://tekton.dev/tekton-task/v0.1", - "inputArtifacts": { - "source": { - "uri": "git+https://github.com/tektoncd/catalog.git", - "digest": { "sha1": "962f9005350b1acb359558b6c5aafc420e0beb46" } - } - }, - "entryPoint": "path/to/sample_taskrun.yaml", - "parameters": null - }, - "buildDependencies": { - "resolvedDependencies": [ - { - "localName": "systemConfiguration", - "digest": { "sha256": "..." }, - "mediaType": "... some tekton config thing? ..." - } - ], - "environment": { - "additionalParams": { - } - } - } - }, - "runDetails": { - "builder": { - "id": "...whoever is running Tekton..." - }, - "metadata": { - "invocationId": "...", - "startedOn": "...", - "finishedOn": "..." - }, - "byproducts": null - } -} -``` - -## GitHub Actions Builder - -```json -"predicate": { - "buildDefinition": { - "topLevelInputs": { - "buildType": "https://github.com/slsa-framework/slsa-github-generator/go@v1", - "inputArtifacts": { - "source": { - "uri": "git+https://github.com/laurentsimon/slsa-verifier-test-gen@refs/heads/main", - "digest": { - "sha1": "15bf79ea9c89fffbf5dd02c6b5b686b291bfcbd2" - } - }, - "builderBinary": { - "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", - "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } - } - }, - "entryPoint": ".github/workflow/release.yml", - "parameters": null - }, - "buildDependencies": { - "resolvedDependencies": [ - { - "uri": "git+https://github.com/laurentsimon/slsa-verifier-test-gen@refs/heads/main", - "digest": { - "sha1": "15bf79ea9c89fffbf5dd02c6b5b686b291bfcbd2" - } - }, - { - "uri": "https://github.com/actions/virtual-environments/releases/tag/ubuntu20/20220515.1" - } - ], - "environment": { - "github_actor": "...", - "github_event_name": "workflow_dispatch", - ... - } - } - }, - "runDetails": { - "builder": { - "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@refs/tags/v0.0.1" - }, - "metadata": { - "invocationId": "...", - "startedOn": "...", - "finishedOn": "..." - }, - "byproducts": null - } -} -``` - diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index 8236511a4..a7f9a7746 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -1,42 +1,50 @@ -Provenance: { - "buildDefinition": { - "topLevelInputs": { - "buildType": string, - "inputArtifacts": {...Artifact}, - "entryPoint": string, - "additionalParameters": {...} +{ + // Standard attestation fields: + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [...], + + // Predicate: + "predicateType": "https://slsa.dev/provenance/v1.0-draft", + "predicate": { + "buildDefinition": { + "topLevelInputs": { + "buildType": string, + "inputArtifacts": { + [string]: #Artifact + }, + "entryPoint": string, + "parameters": {...} + }, + "buildDependencies": { + "resolvedDependencies": [...#Artifact], + "environment": {...} + } }, - "buildDependencies": { - "resolvedDependencies": [...Artifact], - "environment": {...} + "runDetails": { + "builder": { + "id": string, + "version": string + }, + "metadata": { + "invocationId": string, + "startedOn": string, // timestamp + "finishedOn": string // timestamp + }, + "byproducts": [...#Artifact] } - }, - "runDetails": { - "builder": {, - "id": string, - "version": string - }, - "metadata": {, - "invocationId": string, - "startedOn": time.Time, - "finishedOn": time.Time - }, - "byproducts": [...Artifact] } } -Artifact: { +#Artifact: { "uri": string, - "digest": DigestSet, + "digest": { + "sha256": string, + "sha512": string, + "sha1": string, + // TODO: list the other standard algorithms + [string]: string + }, "localName": string, "downloadLocation": string, "mediaType": string } - -DigestSet: { - "sha256": string, - "sha512": string, - "sha1": string, - // TODO: list the other standard algorithms - ...string -} diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 1aaf0f609..29b13f2e1 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -34,21 +34,23 @@ and the larger [in-toto attestation] framework.
-Provenance is an attestation that some entity (`builder`) produced one or more -software artifacts (the `subject` of an in-toto attestation [Statement]) by -executing some `invocation`, using some other artifacts as input (`materials`). -The invocation in turn runs the `buildConfig`, which is a record of what was -executed. The builder is trusted to have faithfully recorded the provenance; -there is no option but to trust the builder. However, the builder may have -performed this operation at the request of some external, possibly untrusted -entity. These untrusted parameters are captured in the invocation's `parameters` -and some of the `materials`. Finally, the build may have depended on various -environmental parameters (`environment`) that are needed for -[reproducing][reproducible] the build but that are not under external control. - -See [Example](#example) for a concrete example. - -![Model Diagram](../images/provenance/v0.2/provenance.svg) +> **TODO:** Rewrite this intro. + +Provenance is an attestation that some entity, the **builder**, produced one or +more software artifacts (the `subject` of an in-toto attestation [Statement]) by +executing some `buildDefinition`. + +The builder is trusted to have faithfully recorded the provenance; there is no +option but to trust the builder. However, the builder may have performed this +operation at the request of some external, possibly untrusted entity. These +untrusted parameters are captured in the invocation's `parameters` and some of +the `materials`. Finally, the build may have depended on various environmental +parameters (`environment`) that are needed for [reproducing][reproducible] the +build but that are not under external control. + +See [Examples](#examples) for concrete examples. + +> **TODO:** Create a new diagram.
@@ -56,46 +58,11 @@ See [Example](#example) for a concrete example.
+> **TODO:** Get proper syntax highlighting for cue, and explain that this is a +> cue schema. + ```jsonc -{ - // Standard attestation fields: - "_type": "https://in-toto.io/Statement/v0.1", - "subject": [{ /*...*/ }], - - // Predicate: - "predicateType": "https://slsa.dev/provenance/v1.0", - "predicate": { - "buildDefinition": { - "type": "", - "configSource": { - "uri": "", - "digest": { /* DigestSet */ }, - "entryPoint": "" - }, - "parameters": { /* object */ }, - // TODO: arch, os, etc? - "environment": { /* object */ }, // TODO: feels off - "materials": [ - { - "uri": "", - "digest": { /* DigestSet */ } - } - ] - }, - "instanceMetadata": { - "builder": { - "service": "", - "tenantProject": "", - }, - "invocationId": "", - "startedOn": "", - "finishedOn": "", - "evaluatedConfig": { - "digest": { /* DigestSet */ } - } - }, - } -} +{% include_relative v1.0.cue %} ```
@@ -124,59 +91,73 @@ This predicate follows the in-toto attestation [parsing rules]. Summary: _NOTE: This section describes the fields within `predicate`. For a description of the other top-level fields, such as `subject`, see [Statement]._ -### buildDefinition +> **TODO:** Automatically parse the proto and render it directly here, rather +> than a simple inclusion of the raw schema file. -#### buildDefinition.configSource +```proto +{% include_relative v1.0.proto %} +``` -##### buildDefinition.configSource.uri +
-##### buildDefinition.configSource.uri +## Examples -### instanceMetadata +
+> **TODO:** Use the same source in all examples for consistency. -
+
-## Example +### Container-based build
-WARNING: This is just for demonstration purposes. +> **WARNING:** This is not yet finalized. -Suppose the builder downloaded `example-1.2.3.tar.gz`, extracted it, and ran -`make -C src foo CFLAGS=-O3`, resulting in a file with hash `5678...`. Then the -provenance might look like this: +> **TODO:** Move this to a separate file with a full description. ```jsonc -{ - "_type": "https://in-toto.io/Statement/v0.1", - // Output file; name is "_" to indicate "not important". - "subject": [{"name": "_", "digest": {"sha256": "5678..."}}], - "predicateType": "https://slsa.dev/provenance/v1.0", - "predicate": { - "buildType": "https://example.com/Makefile", - "builder": { "id": "mailto:person@example.com" }, - "invocation": { - "configSource": { - "uri": "https://example.com/example-1.2.3.tar.gz", - "digest": {"sha256": "1234..."}, - "entryPoint": "src:foo", // target "foo" in directory "src" - }, - "parameters": {"CFLAGS": "-O3"} // extra args to `make` +"predicate": { + "buildDefinition": { + "topLevelInputs": { + "buildType": "https://slsa.dev/container-based-build/v0.1-draft", + "inputArtifacts": { + // The thing to be built. + "source": { + "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", + "digest": { "sha1": "deadbeef" } + }, + // The container image in which to build it. + "buildImage": { + "uri": "pkg:oci/builder-image?repository_url=gcr.io", + "digest": { "sha256": "53ca44..." } + }, + // The top-level binary that orchestrates the build within the + // container. + // TODO: Is that right? + "builderBinary": { + "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", + "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } + } + }, + // Some config file within the source that has more options. + "entryPoint": "path/to/config.file", + "parameters": { + // The path to pull the output from. + // TODO: Can we put this in the config file? + "outputPath": "...", + // The command to run within the container. + // TODO: Can we put this in the config file? + "command": "..." + } + } }, - "materials": [{ - "uri": "https://example.com/example-1.2.3.tar.gz", - "digest": {"sha256": "1234..."} - }] - } + "runDetails": { + // details from whoever ran the build + } } ``` -
- -## More examples - -
@@ -184,79 +165,62 @@ provenance might look like this:
-WARNING: This is only for demonstration purposes. The GitHub Actions team has -not yet reviewed or approved this design, and it is not yet implemented. Details -are subject to change! - -If GitHub is the one to generate provenance, and the runner is GitHub-hosted, -then the builder would be as follows: - -```json -"builder": { - "id": "https://github.com/Attestations/GitHubHostedActions@v1" -} -``` - -Self-hosted runner: Not yet supported. We need to figure out a URI scheme that -represents what system hosted the runner, or perhaps add additional properties -in `builder`. - -
- -#### GitHub Actions Workflow - -
+#### [SLSA GitHub Generator](https://github.com/slsa-framework/slsa-github-generator) ```jsonc -"buildType": "https://github.com/Attestations/GitHubActionsWorkflow@v1", -"invocation": { - "configSource": { - "entryPoint": "build.yaml:build", - // The git repo that contains the build.yaml referenced in the entrypoint. - "uri": "git+https://github.com/foo/bar.git", - // The resolved git commit hash reflecting the version of the repo used - // for this build. - "digest": {"sha1": "abc..."} - }, - // The only possible user-defined parameters that can affect the build are the - // "inputs" to a workflow_dispatch event. This is unset/null for all other - // events. - "parameters": { - "inputs": { ... } - }, - // Other variables that are required to reproduce the build and that cannot be - // recomputed using existing information. (Documentation would explain how to - // recompute the rest of the fields.) - "environment": { - // The architecture of the runner. - "arch": "amd64", - // Environment variables. These are always set because it is not possible - // to know whether they were referenced or not. - "env": { - "GITHUB_RUN_ID": "1234", - "GITHUB_RUN_NUMBER": "5678", - "GITHUB_EVENT_NAME": "push" +"predicate": { + "buildDefinition": { + "topLevelInputs": { + "buildType": "https://github.com/slsa-framework/slsa-github-generator/go@v1", + "inputArtifacts": { + // The repo containing the top-level workflow. + "source": { + "uri": "git+https://github.com/laurentsimon/slsa-verifier-test-gen@refs/heads/main", + "digest": { + "sha1": "15bf79ea9c89fffbf5dd02c6b5b686b291bfcbd2" + } + }, + // The binary that runs under the reusable workflow. + // TODO: is this actually a top-level input, or is this really + // the `builder.id` and a `resolvedDependencies`? + "builderBinary": { + "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", + "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } + } + }, + "entryPoint": ".github/workflow/release.yml", + "parameters": { + // For workflow_dispatch events, the `input` field if present. + "workflowDispatchInput": null + } + }, + "buildDependencies": { + "resolvedDependencies": [ + { + "uri": "https://github.com/actions/virtual-environments/releases/tag/ubuntu20/20220515.1" + } + ], + "environment": { + "github_actor": "...", + "github_event_name": "workflow_dispatch", + // ... and so on ... + } + } }, - // The context values that were referenced in the workflow definition. - // Secrets are set to the empty string. - "context": { - "github": { - "run_id": "abcd1234" - }, - "runner": { - "os": "Linux", - "temp": "/tmp/tmp.iizj8l0XhS", - } + "runDetails": { + "builder": { + "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@refs/tags/v0.0.1", + version: "" // TODO: supply a real example value here + }, + "metadata": { + // TODO: supply real example values here + "invocationId": "...", + "startedOn": "...", + "finishedOn": "..." + }, + "byproducts": null } - } } -"materials": [{ - // The git repo that contains the build.yaml referenced above. - "uri": "git+https://github.com/foo/bar.git", - // The resolved git commit hash reflecting the version of the repo used - // for this build. - "digest": {"sha1": "abc..."} -}] ```
@@ -267,64 +231,7 @@ in `builder`. The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab.com/ee/ci/runners/configure_runners.html#artifact-attestation) capability in their GitLab Runner 15.1 release. -If GitLab is the one to generate provenance, and the runner is GitLab-hosted or self-hosted, -then the builder would be as follows: - -```jsonc -"builder": { - "id": "https://gitlab.com/foo/bar/-/runners/12345678" -} -``` - -
- -#### GitLab CI Job - -
- -```jsonc -"buildType": "https://gitlab.com/gitlab-org/gitlab-runner/-/blob/v15.1.0/PROVENANCE.md", -"invocation": { - "configSource": { - // the git repo that contains the GitLab CI job referenced in the entrypoint - "uri": "https://gitlab.com//foo/bar", - // The resolved git commit hash reflecting the version of the repo used - // for this build. - "digest": { - "sha256": "abc..." - }, - // the name of the CI job that triggered the build - "entryPoint": "build" - }, - // Other variables that are required to reproduce the build and that cannot be - // recomputed using existing information. (Documentation would explain how to - // recompute the rest of the fields.) - "environment": { - // Name of the GitLab runner - "name": "hosted-gitlab-runner", - // The runner executor - "executor": "kubernetes", - // The architecture on which the CI job is run - "architecture": "amd64" - }, - // Collection of all external inputs (CI variables) related to the job - "parameters": { - "CI_PIPELINE_ID": "", - "CI_PIPELINE_URL": "", - // All other CI variable names are listed here. Values are always represented as empty strings to avoid leaking secrets. - } -}, -"metadata": { - "buildStartedOn": "2022-06-17T00:47:27+03:00", - "buildFinishedOn": "2022-06-17T00:47:28+03:00", - "completeness": { - "parameters": true, - "environment": true, - "materials": false - }, - "reproducible": false -} -``` +> **TODO**
@@ -332,164 +239,131 @@ then the builder would be as follows:
-WARNING: This is only for demonstration purposes. The Google Cloud Build team -has not yet reviewed or approved this design, and it is not yet implemented. -Details are subject to change! - -If Google is the one to generate provenance, and the worker is Google-hosted, -then the builder would be as follows: - -```json -"builder": { - "id": "https://cloudbuild.googleapis.com/GoogleHostedWorker@v1" -} -``` - -Custom worker: Not yet supported. We need to figure out a URI scheme that -represents what system hosted the worker, or perhaps add additional properties -in `builder`. +> **TODO**
-#### Cloud Build config-as-code - -
+### Tekton -Here `entryPoint` references the `filename` from the CloudBuild -[BuildTrigger](https://cloud.google.com/build/docs/api/reference/rest/v1/projects.triggers). +#### Tekton task run ```jsonc -"buildType": "https://cloudbuild.googleapis.com/CloudBuildYaml@v1", -"invocation": { - // ... in the git repo described by `materials[0]` ... - "configSource": { - "entryPoint": "path/to/cloudbuild.yaml", - // The git repo that contains the cloudbuild.yaml referenced above. - "uri": "git+https://source.developers.google.com/p/foo/r/bar", - // The resolved git commit hash reflecting the version of the repo used - // for this build. - "digest": {"sha1": "abc..."} - }, - // The only possible user-defined parameters that can affect a BuildTrigger - // are the subtitutions in the BuildTrigger. - "parameters": { - "substitutions": {...} - } -} -"buildConfig": { - // each step in the recipe corresponds to a step in the cloudbuild.yaml - // the format of this is determined by `buildType` - "steps": [ - { - "image": "pkg:docker/make@sha256:244fd47e07d1004f0aed9c", - "arguments": ["build"] +"predicate": { + "buildDefinition": { + "topLevelInputs": { + "type": "https://tekton.dev/tekton-task/v0.1", + "inputArtifacts": { + "source": { + "uri": "git+https://github.com/tektoncd/catalog.git", + "digest": { "sha1": "962f9005350b1acb359558b6c5aafc420e0beb46" } + } + }, + "entryPoint": "path/to/sample_taskrun.yaml", + "parameters": null + }, + "buildDependencies": { + "resolvedDependencies": [ + { + // The SystemConfiguration that Tekton was configured with. + "localName": "systemConfiguration", + "digest": { "sha256": "..." }, + "mediaType": "... some tekton config thing? ..." + } + ], + "environment": null + } + }, + "runDetails": { + // Set by the organization running Tekton } - ] } -"materials": [{ - // The git repo that contains the cloudbuild.yaml referenced above. - "uri": "git+https://source.developers.google.com/p/foo/r/bar", - // The resolved git commit hash reflecting the version of the repo used - // for this build. - "digest": {"sha1": "abc..."} -}] ``` -
- -#### Cloud Build RPC - -
- -Here we list the steps defined in a trigger or over RPC: - -```jsonc -"buildType": "https://cloudbuild.googleapis.com/CloudBuildSteps@v1", -"invocation": { - // Build steps were provided as an argument. No `configSource` - "parameters": { - // The substitutions in the build trigger. - "substitutions": {...} - // TODO: Any other arguments? - } -} -"buildConfig": { - // The steps that were performed. (Format TBD.) - "steps": [...] -} -``` - -
- ### Explicitly run commands
WARNING: This is just a proof-of-concept. It is not yet standardized. -Execution of arbitrary commands: - -```jsonc -"buildType": "https://example.com/ManuallyRunCommands@v1", -// There was no entry point, and the commands were run in an ad-hoc fashion. -// There is no `configSource`. -"invocation": null, -"buildConfig": { - // The list of commands that were executed. - "commands": [ - "tar xvf foo-1.2.3.tar.gz", - "cd foo-1.2.3", - "./configure --enable-some-feature", - "make foo.zip" - ], - // Indicates how to parse the strings in `commands`. - "shell": "bash" -} -``` +> **TODO**
-## Migrating from 0.1 +## Migrating from 0.2
-To migrate from [version 0.1][0.1] (`old`): +To migrate from [version 0.2][0.2] (`old`), use the following pseudocode. The +meaning of each field is unchanged unless otherwise noted. ```javascript { - "builder": old.builder, // (unchanged) - "buildType": old.recipe.type, - "invocation": { - "configSource": { - "uri": old.materials[old.recipe.definedInMaterial].uri, - "digest": old.materials[old.recipe.definedInMaterial].digest, - "entrypoint": old.recipe.entryPoint - }, - "parameters": old.recipe.arguments, - "environment": old.recipe.environment // (unchanged) - }, - "buildConfig": null, // no equivalent in 0.1 - "metadata": { - "buildInvocationId": old.metadata.buildInvocationId, // (unchanged) - "buildStartedOn": old.metadata.buildStartedOn, // (unchanged) - "buildFinishedOn": old.metadata.buildFinishedOn, // (unchanged) - "completeness": { - "parameters": old.metadata.completeness.arguments, - "environment": old.metadata.completeness.environment, // (unchanged) - "materials": old.metadata.completeness.materials, // (unchanged) + "buildDefinition": { + "topLevelInputs": { + // The `buildType` MUST be updated for v1.0 to describe how to + // interpret `inputArtifacts`. + "buildType": old.buildType, + "inputArtifacts": { + // OPTION 1: + // If the old `configSource` was the sole top-level input, + // (i.e. containing the source or a pointer to the source): + "source": { + "uri": old.invocation.configSource.uri, + "digest": old.invocation.configSource.digest, + }, + // OPTION 2: + // If the old `configSource` contained just build configuration + // and a separate top-level input contained the source: + "source": old.materials[indexOfSource], + "config": { + "uri": old.invocation.configSource.uri, + "digest": old.invocation.configSource.digest, + } + }, + "entryPoint": old.invocation.configSource.entryPoint, + "parameters": old.invocation.parameters, + }, + "buildDependencies": { + "resolvedDependencies": old.materials, + "environment": old.invocation.environment, + } }, - "reproducible": old.metadata.reproducible // (unchanged) - }, - "materials": old.materials // optionally removing the configSource + "runDetails": { + "builder": { + "id": old.builder.id, + "version": null // not in v0.2 + }, + "metadata": { + "invocationId": old.metadata.buildInvocationId, + "startedOn": old.metadata.buildStartedOn, + "finishedOn": old.metadata.buildFinishedOn, + }, + "byproducts": null // not in v0.2 + } } ``` +The following fields from v0.2 are no longer present in v1.0: + +* `buildConfig`: Replacement depends on the use case: + * If the build configuration is an independent input, hash it + deterministically and include it as `inputArtifacts.config`. + * Else if there is a known use case for the resolved build configuration, + hash it deterministically and include it in `byproducts`. + * Else omit it. +* `metadata.completeness`: Now implicit from `builder.id`. +* `metadata.reproducible`: Now implicit from `builder.id`. +
## Change history
+> **TODO:** Use headings for versions rather than bullets. + +- 1.0: Refactored to steer guide implementers to do the right thing. + - **TODO**: describe changes - 0.2: Refactored to aid clarity and added `buildConfig`. The model is unchanged. - Replaced `definedInMaterial` and `entryPoint` with `configSource`. @@ -503,6 +377,7 @@ To migrate from [version 0.1][0.1] (`old`): - 0.1: Initial version, named "in-toto.io/Provenance" [0.1]: v0.1.md +[0.2]: v0.2.md [DigestSet]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#DigestSet [GitHub Actions]: #github-actions [Reproducible]: https://reproducible-builds.org From df6bec95549be339b88c3b09ca9a26f02eb3bdd8 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 1 Nov 2022 15:57:26 -0400 Subject: [PATCH 14/44] WIP: remove extra divs Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 58 ++--------------------------------------- 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 29b13f2e1..ba11d222a 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -4,12 +4,8 @@ layout: standard hero_text: To trace software back to the source and define the moving parts in a complex supply chain, provenance needs to be there from the very beginning. It’s the verifiable information about software artifacts describing where, when and how something was produced. For higher SLSA levels and more resilient integrity guarantees, provenance requirements are stricter and need a deeper, more technical understanding of the predicate. --- -
- ## Purpose -
- Describe how an artifact or set of artifacts was produced so that: 1. Consumers of the provenance can verify that the artifact was built according @@ -19,21 +15,13 @@ Describe how an artifact or set of artifacts was produced so that: This predicate is the recommended way to satisfy the SLSA [provenance requirements]. -
- ## Prerequisite -
- Understanding of SLSA [Software Attestations](/attestation-model) and the larger [in-toto attestation] framework. -
- ## Model -
- > **TODO:** Rewrite this intro. Provenance is an attestation that some entity, the **builder**, produced one or @@ -52,12 +40,8 @@ See [Examples](#examples) for concrete examples. > **TODO:** Create a new diagram. -
- ## Schema -
- > **TODO:** Get proper syntax highlighting for cue, and explain that this is a > cue schema. @@ -65,12 +49,8 @@ See [Examples](#examples) for concrete examples. {% include_relative v1.0.cue %} ``` -
- ### Parsing rules -
- This predicate follows the in-toto attestation [parsing rules]. Summary: - Consumers MUST ignore unrecognized fields. @@ -82,12 +62,8 @@ This predicate follows the in-toto attestation [parsing rules]. Summary: - Optional fields MAY be unset or null, and should be treated equivalently. Both are equivalent to empty for _object_ or _array_ values. -
- ### Fields -
- _NOTE: This section describes the fields within `predicate`. For a description of the other top-level fields, such as `subject`, see [Statement]._ @@ -98,20 +74,14 @@ of the other top-level fields, such as `subject`, see [Statement]._ {% include_relative v1.0.proto %} ``` -
- ## Examples -
- -> **TODO:** Use the same source in all examples for consistency. +> **TODO:** Use the same source repo in all examples for consistency. -
+> **TODO:** Automatically verify all examples against the cue schema. ### Container-based build -
- > **WARNING:** This is not yet finalized. > **TODO:** Move this to a separate file with a full description. @@ -159,12 +129,8 @@ of the other top-level fields, such as `subject`, see [Statement]._ ``` -
- ### GitHub Actions -
- #### [SLSA GitHub Generator](https://github.com/slsa-framework/slsa-github-generator) ```jsonc @@ -223,26 +189,16 @@ of the other top-level fields, such as `subject`, see [Statement]._ } ``` -
- ### GitLab CI -
- The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab.com/ee/ci/runners/configure_runners.html#artifact-attestation) capability in their GitLab Runner 15.1 release. > **TODO** -
- ### Google Cloud Build -
- > **TODO** -
- ### Tekton #### Tekton task run @@ -281,18 +237,12 @@ The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab ### Explicitly run commands -
- WARNING: This is just a proof-of-concept. It is not yet standardized. > **TODO** -
- ## Migrating from 0.2 -
- To migrate from [version 0.2][0.2] (`old`), use the following pseudocode. The meaning of each field is unchanged unless otherwise noted. @@ -354,12 +304,8 @@ The following fields from v0.2 are no longer present in v1.0: * `metadata.completeness`: Now implicit from `builder.id`. * `metadata.reproducible`: Now implicit from `builder.id`. -
- ## Change history -
- > **TODO:** Use headings for versions rather than bullets. - 1.0: Refactored to steer guide implementers to do the right thing. From 668f41a9a714691ba51b86b4ddb375e1413d5949 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 1 Nov 2022 17:28:37 -0400 Subject: [PATCH 15/44] WIP: rewrite intro Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index ba11d222a..b0aa95885 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -22,23 +22,16 @@ and the larger [in-toto attestation] framework. ## Model -> **TODO:** Rewrite this intro. - -Provenance is an attestation that some entity, the **builder**, produced one or -more software artifacts (the `subject` of an in-toto attestation [Statement]) by -executing some `buildDefinition`. +Provenance is an attestation that the `builder` produced the software artifacts +`subject` through execution of the `buildDefinition`. The builder is trusted to have faithfully recorded the provenance; there is no option but to trust the builder. However, the builder may have performed this -operation at the request of some external, possibly untrusted entity. These -untrusted parameters are captured in the invocation's `parameters` and some of -the `materials`. Finally, the build may have depended on various environmental -parameters (`environment`) that are needed for [reproducing][reproducible] the -build but that are not under external control. +operation at the request of some external, possibly untrusted entity. See [Examples](#examples) for concrete examples. -> **TODO:** Create a new diagram. +> **TODO:** Align with the [Build model](../spec/v1.0/terminology.md). ## Schema From 5a96d3a17a6c0bc42f845022b86d4403057a22c3 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 1 Nov 2022 17:39:35 -0400 Subject: [PATCH 16/44] WIP: rename Artifact to ArtifactReference Signed-off-by: Mark Lodato --- docs/provenance/v1.0.cue | 8 ++++---- docs/provenance/v1.0.proto | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index a7f9a7746..f2477ab64 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -10,13 +10,13 @@ "topLevelInputs": { "buildType": string, "inputArtifacts": { - [string]: #Artifact + [string]: #ArtifactReference }, "entryPoint": string, "parameters": {...} }, "buildDependencies": { - "resolvedDependencies": [...#Artifact], + "resolvedDependencies": [...#ArtifactReference], "environment": {...} } }, @@ -30,12 +30,12 @@ "startedOn": string, // timestamp "finishedOn": string // timestamp }, - "byproducts": [...#Artifact] + "byproducts": [...#ArtifactReference] } } } -#Artifact: { +#ArtifactReference: { "uri": string, "digest": { "sha256": string, diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 8fd430fc9..06469a77a 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -99,7 +99,7 @@ message BuildInputs { // // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? - map inputArtifacts = 2; + map inputArtifacts = 2; // The identifier that determines the specific build definition within // `inputArtifacts`. This is often a path to a configuration file and optional @@ -140,7 +140,7 @@ message BuildDependencies { // TODO: If the dep is already pinned, does it need to be listed here? // TODO: Should this be a map instead of an array? Then each MUST be named // explicitly, which would be less ambiguous but more difficult. - repeated Artifact resolvedDependencies = 1; + repeated ArtifactReference resolvedDependencies = 1; // Parameters of the build environment that were provided by the `builder` and // not under external control. The primary intention of this field is for @@ -150,7 +150,7 @@ message BuildDependencies { google.protobuf.Struct environment = 2; } -message Artifact { +message ArtifactReference { // [URI] describing where this artifact came from. When possible, this SHOULD // be a universal and stable identifier, such as a source location or Package // URL. @@ -204,7 +204,7 @@ message RunDetails { // // TODO: Do we need some recommendation for how to distinguish between // byproducts? For example, should we recommend using `localName`? - repeated Artifact byproducts = 3; + repeated ArtifactReference byproducts = 3; } message Builder { From c397e76df9c2562301d3288007865073dcd9ef97 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 1 Nov 2022 17:42:02 -0400 Subject: [PATCH 17/44] Use headings in change history Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 55 ++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index b0aa95885..cdb5b70ce 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -299,21 +299,46 @@ The following fields from v0.2 are no longer present in v1.0: ## Change history -> **TODO:** Use headings for versions rather than bullets. - -- 1.0: Refactored to steer guide implementers to do the right thing. - - **TODO**: describe changes -- 0.2: Refactored to aid clarity and added `buildConfig`. The model is - unchanged. - - Replaced `definedInMaterial` and `entryPoint` with `configSource`. - - Renamed `recipe` to `invocation`. - - Moved `invocation.type` to top-level `buildType`. - - Renamed `arguments` to `parameters`. - - Added `buildConfig`, which can be used as an alternative to - `configSource` to validate the configuration. -- Renamed to "slsa.dev/provenance". -- 0.1.1: Added `metadata.buildInvocationId`. -- 0.1: Initial version, named "in-toto.io/Provenance" +### v1.0 (DRAFT) + +Refactored to reduce the chance of mistakes in interpretation. The model has +changed slightly. + +- Grouped fields into `buildDefinition.topLevelInputs`, + `buildDefinition.buildDependencies`, and `runDetails` according to their use + case and security properties. +- Replaced `buildConfig` with `inputArtifacts` to allow for multiple top-level + input artifacts to be specified. +- Moved `entryPoint` out of `buildConfig` so that all artifact references + share a common schema. +- Added `builder.version`. +- Added `byproducts`. +- Removed `buildConfig`; can use `inputArtifacts.config` or `byproducts` + instead. +- Removed `completeness` and `reproducible`; now implied by `builder.id`. + +### v0.2 + +Refactored to aid clarity and added `buildConfig`. The model is unchanged. + +- Replaced `definedInMaterial` and `entryPoint` with `configSource`. +- Renamed `recipe` to `invocation`. +- Moved `invocation.type` to top-level `buildType`. +- Renamed `arguments` to `parameters`. +- Added `buildConfig`, which can be used as an alternative to `configSource` + to validate the configuration. + +### slsa.dev/provenance + +Renamed to "slsa.dev/provenance". + +### 0.1.1 + +- Added `metadata.buildInvocationId`. + +### 0.1 + +Initial version, named "in-toto.io/Provenance" [0.1]: v0.1.md [0.2]: v0.2.md From f1adaf3bdb7387ae61f2f37699491387918c3e69 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 1 Nov 2022 17:43:53 -0400 Subject: [PATCH 18/44] Make draft URL work Signed-off-by: Mark Lodato --- docs/provenance/v1.0.cue | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index f2477ab64..8f7cf866b 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -4,10 +4,10 @@ "subject": [...], // Predicate: - "predicateType": "https://slsa.dev/provenance/v1.0-draft", + "predicateType": "https://slsa.dev/provenance/v1.0?draft", "predicate": { "buildDefinition": { - "topLevelInputs": { + "topLevelInputs": { "buildType": string, "inputArtifacts": { [string]: #ArtifactReference From f5a4b06c24edfc7257adf0b79ad4dd65357da795 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 1 Nov 2022 17:48:01 -0400 Subject: [PATCH 19/44] fix lint errors Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index cdb5b70ce..0739fe9d8 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -121,7 +121,6 @@ of the other top-level fields, such as `subject`, see [Statement]._ } ``` - ### GitHub Actions #### [SLSA GitHub Generator](https://github.com/slsa-framework/slsa-github-generator) @@ -288,14 +287,14 @@ meaning of each field is unchanged unless otherwise noted. The following fields from v0.2 are no longer present in v1.0: -* `buildConfig`: Replacement depends on the use case: - * If the build configuration is an independent input, hash it +- `buildConfig`: Replacement depends on the use case: + - If the build configuration is an independent input, hash it deterministically and include it as `inputArtifacts.config`. - * Else if there is a known use case for the resolved build configuration, + - Else if there is a known use case for the resolved build configuration, hash it deterministically and include it in `byproducts`. - * Else omit it. -* `metadata.completeness`: Now implicit from `builder.id`. -* `metadata.reproducible`: Now implicit from `builder.id`. + - Else omit it. +- `metadata.completeness`: Now implicit from `builder.id`. +- `metadata.reproducible`: Now implicit from `builder.id`. ## Change history From b661ee17424f1838effec14707c9980394b1d4ee Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 4 Nov 2022 16:15:04 -0400 Subject: [PATCH 20/44] Address PR feedback Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 38 ++++++++++++++++++++++---------------- docs/provenance/v1.0.proto | 4 ++-- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 0739fe9d8..eb8db10e3 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -8,9 +8,9 @@ hero_text: To trace software back to the source and define the moving parts in a Describe how an artifact or set of artifacts was produced so that: -1. Consumers of the provenance can verify that the artifact was built according +- Consumers of the provenance can verify that the artifact was built according to expectations. -2. Others can rebuild the artifact, if desired. +- Others can rebuild the artifact, if desired. This predicate is the recommended way to satisfy the SLSA [provenance requirements]. @@ -22,8 +22,8 @@ and the larger [in-toto attestation] framework. ## Model -Provenance is an attestation that the `builder` produced the software artifacts -`subject` through execution of the `buildDefinition`. +Provenance is an attestation that the `builder` produced the `subject` software +artifacts through execution of the `buildDefinition`. The builder is trusted to have faithfully recorded the provenance; there is no option but to trust the builder. However, the builder may have performed this @@ -33,6 +33,9 @@ See [Examples](#examples) for concrete examples. > **TODO:** Align with the [Build model](../spec/v1.0/terminology.md). +> **TODO:** Link to principles page in "no option to trust the builder" once +> [#528](https://github.com/slsa-framework/slsa/pull/528) lands. + ## Schema > **TODO:** Get proper syntax highlighting for cue, and explain that this is a @@ -73,9 +76,12 @@ of the other top-level fields, such as `subject`, see [Statement]._ > **TODO:** Automatically verify all examples against the cue schema. -### Container-based build +### Container-based reproducible build -> **WARNING:** This is not yet finalized. +> **WARNING:** This is not yet finalized. So far this is an idea under +> discussion to have the provenance document the input for reproducible builds, +> where the input is a container image + entry point. We will point to the +> actual documentation once the design is ready. > **TODO:** Move this to a separate file with a full description. @@ -199,7 +205,7 @@ The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab "predicate": { "buildDefinition": { "topLevelInputs": { - "type": "https://tekton.dev/tekton-task/v0.1", + "buildType": "https://tekton.dev/tekton-task/v0.1", "inputArtifacts": { "source": { "uri": "git+https://github.com/tektoncd/catalog.git", @@ -244,7 +250,7 @@ meaning of each field is unchanged unless otherwise noted. "topLevelInputs": { // The `buildType` MUST be updated for v1.0 to describe how to // interpret `inputArtifacts`. - "buildType": old.buildType, + "buildType": (updated version of) old.buildType, "inputArtifacts": { // OPTION 1: // If the old `configSource` was the sole top-level input, @@ -287,11 +293,14 @@ meaning of each field is unchanged unless otherwise noted. The following fields from v0.2 are no longer present in v1.0: -- `buildConfig`: Replacement depends on the use case: - - If the build configuration is an independent input, hash it - deterministically and include it as `inputArtifacts.config`. - - Else if there is a known use case for the resolved build configuration, - hash it deterministically and include it in `byproducts`. +- `buildConfig`: Instead of inlining the resolved build configuration into the + provenance, either: + - If the configuration is a top-level input, record its digest in + `inputArtifacts["config"]`. + - Else if there is a known use case for knowing the exact resolved + build configuration, record its digest in `byproducts`. An example use + case might be someone who wishes to parse the configuration to look for + bad patterns, such as `curl | bash`. - Else omit it. - `metadata.completeness`: Now implicit from `builder.id`. - `metadata.reproducible`: Now implicit from `builder.id`. @@ -342,12 +351,9 @@ Initial version, named "in-toto.io/Provenance" [0.1]: v0.1.md [0.2]: v0.2.md [DigestSet]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#DigestSet -[GitHub Actions]: #github-actions -[Reproducible]: https://reproducible-builds.org [ResourceURI]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#ResourceURI [Statement]: https://github.com/in-toto/attestation/blob/main/spec/README.md#statement [Timestamp]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#Timestamp [TypeURI]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#TypeURI [in-toto attestation]: https://github.com/in-toto/attestation [parsing rules]: https://github.com/in-toto/attestation/blob/main/spec/README.md#parsing-rules -[provenance requirements]: ../spec/{{ site.current_spec_version }}/requirements#provenance-requirements diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 06469a77a..1fc754219 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -153,7 +153,7 @@ message BuildDependencies { message ArtifactReference { // [URI] describing where this artifact came from. When possible, this SHOULD // be a universal and stable identifier, such as a source location or Package - // URL. + // URL ([PURL]). // // Example: `pkg:pypi/pyyaml@6.0` string uri = 1; @@ -167,7 +167,7 @@ message ArtifactReference { string localName = 3; // [URI] identifying the location that this artifact was downloaded from, if - // different and not derivable from `canonicalId`. + // different and not derivable from `uri`. // // Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` string downloadLocation = 4; From 5aab59b27d44e465c449673de5b4d7d1ea6297ee Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Mon, 7 Nov 2022 13:18:45 -0500 Subject: [PATCH 21/44] Add builderDependencies Signed-off-by: Mark Lodato --- docs/provenance/v1.0.cue | 3 ++- docs/provenance/v1.0.md | 20 +++++++++++--------- docs/provenance/v1.0.proto | 6 ++++++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index 8f7cf866b..3ba242073 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -23,7 +23,8 @@ "runDetails": { "builder": { "id": string, - "version": string + "version": string, + "builderDependencies": [...#ArtifactReference] }, "metadata": { "invocationId": string, diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index eb8db10e3..e07dc20ac 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -143,13 +143,6 @@ of the other top-level fields, such as `subject`, see [Statement]._ "digest": { "sha1": "15bf79ea9c89fffbf5dd02c6b5b686b291bfcbd2" } - }, - // The binary that runs under the reusable workflow. - // TODO: is this actually a top-level input, or is this really - // the `builder.id` and a `resolvedDependencies`? - "builderBinary": { - "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", - "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } } }, "entryPoint": ".github/workflow/release.yml", @@ -174,7 +167,15 @@ of the other top-level fields, such as `subject`, see [Statement]._ "runDetails": { "builder": { "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@refs/tags/v0.0.1", - version: "" // TODO: supply a real example value here + version: "", // TODO: supply a real example value here + "builderDependencies": [ + { + // TODO: Do we need a field to differentiate this from other + // deps, e.g localName or something else? + "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", + "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } + } + ] }, "metadata": { // TODO: supply real example values here @@ -280,6 +281,7 @@ meaning of each field is unchanged unless otherwise noted. "builder": { "id": old.builder.id, "version": null // not in v0.2 + "builderDependencies": null // not in v0.2 }, "metadata": { "invocationId": old.metadata.buildInvocationId, @@ -319,7 +321,7 @@ changed slightly. input artifacts to be specified. - Moved `entryPoint` out of `buildConfig` so that all artifact references share a common schema. -- Added `builder.version`. +- Added `builder.version` and `builder.builderDependencies`. - Added `byproducts`. - Removed `buildConfig`; can use `inputArtifacts.config` or `byproducts` instead. diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 1fc754219..07072ff8b 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -218,6 +218,12 @@ message Builder { // TODO: Do we want to add this field? (#319) string version = 2; + + // Dependencies used by the orchestrator that are not run within the workload + // and that should not affect the build, but may affect the provenance + // generation or security guarantees. + // TODO: Flesh out this model more. + repeated ArtifactReference builderDependencies = 3; } message BuildMetadata { From 78ae06f7f9c6a4b58b7b85f99960c1fa1ae95df2 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 9 Nov 2022 10:30:07 -0500 Subject: [PATCH 22/44] WIP: lowercase purl, move TODO Signed-off-by: Mark Lodato --- docs/provenance/v1.0.proto | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 07072ff8b..9d85eb6e1 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -114,15 +114,15 @@ message BuildInputs { // easier to verify. Most builds need an entry point but not extra parameters. string entryPoint = 3; - // TODO: We probably need os and architecture. How to record it? Anything else - // in that category? - // Additional per-invocation parameters that were not covered by another // field. The schema and interpretation are defined by `buildType`. // // TODO: finish explanation, particularly that consumers SHOULD have a way to // know what is expected, and thus it is simplest if it is empty google.protobuf.Struct parameters = 4; + + // TODO: We probably need os and architecture. How to record it? Anything else + // in that category? } message BuildDependencies { @@ -153,7 +153,7 @@ message BuildDependencies { message ArtifactReference { // [URI] describing where this artifact came from. When possible, this SHOULD // be a universal and stable identifier, such as a source location or Package - // URL ([PURL]). + // URL ([purl]). // // Example: `pkg:pypi/pyyaml@6.0` string uri = 1; From 11459e62dcbc660c8e7acf99623a2c153fd45de4 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 11 Nov 2022 12:08:10 -0500 Subject: [PATCH 23/44] WIP Signed-off-by: Mark Lodato --- docs/provenance/v1.0.cue | 27 +++++++++----- docs/provenance/v1.0.md | 2 +- docs/provenance/v1.0.proto | 72 ++++++++++++++++++++++++++------------ 3 files changed, 70 insertions(+), 31 deletions(-) diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index 3ba242073..f5967568e 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -12,26 +12,35 @@ "inputArtifacts": { [string]: #ArtifactReference }, - "entryPoint": string, - "parameters": {...} + "parameters": { + [string]: object + } }, "buildDependencies": { - "resolvedDependencies": [...#ArtifactReference], - "environment": {...} + "resolvedDependencies": [ + ...#ArtifactReference + ], + "environment": { + [string]: object + } } }, "runDetails": { "builder": { "id": string, "version": string, - "builderDependencies": [...#ArtifactReference] + "builderDependencies": [ + ...#ArtifactReference + ] }, "metadata": { "invocationId": string, - "startedOn": string, // timestamp - "finishedOn": string // timestamp + "startedOn": #Timestamp, + "finishedOn": #Timestamp }, - "byproducts": [...#ArtifactReference] + "byproducts": [ + ...#ArtifactReference + ] } } } @@ -49,3 +58,5 @@ "downloadLocation": string, "mediaType": string } + +#Timestamp: string // --
T::Z diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index e07dc20ac..f5cb7df45 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -41,7 +41,7 @@ See [Examples](#examples) for concrete examples. > **TODO:** Get proper syntax highlighting for cue, and explain that this is a > cue schema. -```jsonc +```javascript {% include_relative v1.0.cue %} ``` diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 9d85eb6e1..727e3374a 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -37,9 +37,9 @@ message BuildDefinition { // all referenced input artifacts and dependencies. // 2. Verify that the build was as expected. // - // The build system should be designed to minimize the amount of information + // The build system SHOULD be designed to minimize the amount of information // necessary here, in order to reduce fragility and ease verification. - // Consumers MUST have an expectation of what "good" looks like; the more + // Consumers SHOULD have an expectation of what "good" looks like; the more // information that they must check, the harder that task becomes. // // Guidelines: @@ -56,7 +56,7 @@ message BuildDefinition { // - If possible, architect the build system to use this definition as its // sole top-level input, in order to guarantee that the information is // sufficient to run the build. - BuildInputs topLevelInputs = 1; + TopLevelInputs topLevelInputs = 1; // Other information necessary to perform the build but either derived from // `topLevelInputs` or provided by the build system. @@ -69,14 +69,51 @@ message BuildDefinition { BuildDependencies buildDependencies = 2; } -message BuildInputs { - // [TypeURI] indicating how to interpret and act upon this message. +message TopLevelInputs { + // [TypeURI] indicating how to unambiguously interpret this message and + // initiate the build. This SHOULD resolve to a human-readable specification + // that lists: + // + // - Overall description of what this type means. + // - Schema for `inputArtifacts` and `parameters`. For each field, this + // SHOULD include: + // - name + // - description + // - type + // - required vs optional + // - any other restrictions + // - Explicit, unambiguous instructions for how to initiate the build given + // this message. + // + // Examples: + // - A "GitHub Actions Workflow" URI might mean to check out the git repo + // `inputArtifacts["source"]` and check out + // definition file pointed to by `parameters[' + // For example, an "NPM build" URI might mean: check out the source repo named + // inputArtifacts["source"], cd to that directory, install the version of + // Node listed in some config file, and then execute `npm run + // `. By designing the type this way, all of + // these implicit steps can be checked against a simple expectation of URI + // without the fragility of a long list of commands. + // + // The type is necessary to make sure that two different parties interpret the + // provenance in the same way. For example, + // + // 1. Beause + // This is necessary because different build systems use different + // conventions. A human might easily figure out what is supposed to happen by + // machine + // + // TODO: Add examples to full-fledged definitions each with their own + // page. // // TODO: Provide more guidance on how to define this. + // + // REQUIRED for SLSA Build L1. string buildType = 1; - // The top-level, independent input artifacts to the build. In many cases, - // this is a singular "source" to be built. + // References to the top-level, independent input artifacts to the build. In + // many cases, this is a singular "source" artifact to be built. // // The key is a name whose interpretation depends on `buildType`. If there is // only one input, it SHOULD be named "source". The following conventional @@ -99,26 +136,17 @@ message BuildInputs { // // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? - map inputArtifacts = 2; - - // The identifier that determines the specific build definition within - // `inputArtifacts`. This is often a path to a configuration file and optional - // target label within that file. The syntax and interpretation are defined by - // `buildType`. - // - // This field SHOULD be omitted if it is implicit from `buildType`. Being - // empty simplifies verification of the provenance, thus is preferred when - // possible. // - // Design rationale: The `entryPoint` is distinct from `parameters` to make it - // easier to verify. Most builds need an entry point but not extra parameters. - string entryPoint = 3; + // REQUIRED for SLSA Build L1. + map inputArtifacts = 2; - // Additional per-invocation parameters that were not covered by another - // field. The schema and interpretation are defined by `buildType`. + // All other input parameters necessary to invoke the build. The schema and + // interpretation are defined by `buildType`. // // TODO: finish explanation, particularly that consumers SHOULD have a way to // know what is expected, and thus it is simplest if it is empty + // + // REQUIRED for SLSA Build L1, though it may be empty. google.protobuf.Struct parameters = 4; // TODO: We probably need os and architecture. How to record it? Anything else From 5d1e791b7eefba356c4e56e268c0df0925507b56 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Mon, 14 Nov 2022 17:06:14 -0500 Subject: [PATCH 24/44] WIP: refactor - external vs system parameters Signed-off-by: Mark Lodato --- docs/provenance/v1.0.cue | 39 +++--- docs/provenance/v1.0.md | 225 ++++++++++++++++++++--------------- docs/provenance/v1.0.proto | 187 ++++++++++++++--------------- docs/provenance/v1_model.svg | 1 + 4 files changed, 234 insertions(+), 218 deletions(-) create mode 100644 docs/provenance/v1_model.svg diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index f5967568e..56c04978d 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -7,40 +7,29 @@ "predicateType": "https://slsa.dev/provenance/v1.0?draft", "predicate": { "buildDefinition": { - "topLevelInputs": { - "buildType": string, - "inputArtifacts": { - [string]: #ArtifactReference - }, - "parameters": { - [string]: object - } - }, - "buildDependencies": { - "resolvedDependencies": [ - ...#ArtifactReference - ], - "environment": { - [string]: object - } + "buildType": string, + "externalParameters": { + "artifacts": { [string]: #ArtifactReference }, + "values": { [string]: string }, + } + "systemParameters": { + "artifacts": { [string]: #ArtifactReference }, + "values": { [string]: string }, } + "resolvedDependencies": [ ...#ArtifactReference ], }, "runDetails": { "builder": { "id": string, "version": string, - "builderDependencies": [ - ...#ArtifactReference - ] + "builderDependencies": [ ...#ArtifactReference ], }, "metadata": { "invocationId": string, "startedOn": #Timestamp, - "finishedOn": #Timestamp + "finishedOn": #Timestamp, }, - "byproducts": [ - ...#ArtifactReference - ] + "byproducts": [ ...#ArtifactReference ], } } } @@ -52,11 +41,11 @@ "sha512": string, "sha1": string, // TODO: list the other standard algorithms - [string]: string + [string]: string, }, "localName": string, "downloadLocation": string, - "mediaType": string + "mediaType": string, } #Timestamp: string // --
T::Z diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index f5cb7df45..f1b92efcb 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -25,16 +25,48 @@ and the larger [in-toto attestation] framework. Provenance is an attestation that the `builder` produced the `subject` software artifacts through execution of the `buildDefinition`. -The builder is trusted to have faithfully recorded the provenance; there is no -option but to trust the builder. However, the builder may have performed this -operation at the request of some external, possibly untrusted entity. +![Build Model](v1_model.svg) -See [Examples](#examples) for concrete examples. +The model is as follows: + +- The build runs on a multi-tenant platform, where each execution is + independent. The `builder` is the identity of this platform, representing + the transitive closure of all entities that must be + [trusted](../spec/v1.0/priciples.md#trust-systems-verify-artifacts) to + faithfully run the build and record the provenance. (Note: The same model + can be used for platform-less or single-tenant build systems.) + +- The build process is defined by a parameterized template, identified by + `buildType`. Often a build platform only supports a single template, e.g. + the GitHub Actions platform only supports executing a GitHub Actions + workflow file. + +- The external interface to the build is through `externalParameters`, which + captures the set of top-level (i.e. independent) inputs to the build. Some + of these parameters are references to artifacts while others are pure + values. For example, for GitHub Actions, this would be the source repository + (artifact reference) and path to the workflow file (value). + +- The build runs inside an environment initialized by the platform. The + `systemParameters` capture the top-level (i.e. independent) parameters to + this environment. These parameters are set internally by the platform, + though they may refer to external artifacts. (The build platform may also + communicate with the build environment through some control plane, but this + is not captured in the provenance.) + +- Dependent artifacts may be fetched during initialization or execution of the + build process. The `resolvedDependencies` captures these dependencies, if + known. + +- Finally, the build process outputs one or more artifacts, identified by + `subject`. + +See [examples](#examples) for concrete examples. > **TODO:** Align with the [Build model](../spec/v1.0/terminology.md). -> **TODO:** Link to principles page in "no option to trust the builder" once -> [#528](https://github.com/slsa-framework/slsa/pull/528) lands. +> **TODO:** Limit the size of the diagram - it's too big on desktop (but fine on +> mobile). Also reimplement it in Figma so that others can edit it. ## Schema @@ -72,6 +104,9 @@ of the other top-level fields, such as `subject`, see [Statement]._ ## Examples +> **TODO:** Move each of these definitions to a separate file that defines +> `buildType`. + > **TODO:** Use the same source repo in all examples for consistency. > **TODO:** Automatically verify all examples against the cue schema. @@ -88,9 +123,9 @@ of the other top-level fields, such as `subject`, see [Statement]._ ```jsonc "predicate": { "buildDefinition": { - "topLevelInputs": { - "buildType": "https://slsa.dev/container-based-build/v0.1-draft", - "inputArtifacts": { + "buildType": "https://slsa.dev/container-based-build/v0.1?draft", + "externalParameters": { + "artifacts": { // The thing to be built. "source": { "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", @@ -101,28 +136,25 @@ of the other top-level fields, such as `subject`, see [Statement]._ "uri": "pkg:oci/builder-image?repository_url=gcr.io", "digest": { "sha256": "53ca44..." } }, - // The top-level binary that orchestrates the build within the - // container. - // TODO: Is that right? - "builderBinary": { - "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", - "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } - } - }, - // Some config file within the source that has more options. - "entryPoint": "path/to/config.file", - "parameters": { - // The path to pull the output from. - // TODO: Can we put this in the config file? - "outputPath": "...", - // The command to run within the container. - // TODO: Can we put this in the config file? - "command": "..." + } + "values": { + "configFile": "path/to/config.file" } } + "systemParameters": null, + "resolvedDependencies": null, }, "runDetails": { - // details from whoever ran the build + "builder": { + "id": "..whoever ran the build...", + "builderDependencies": [ + { + "localName": "builderBinary", + "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", + "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } + } + ] + } } } ``` @@ -134,9 +166,9 @@ of the other top-level fields, such as `subject`, see [Statement]._ ```jsonc "predicate": { "buildDefinition": { - "topLevelInputs": { - "buildType": "https://github.com/slsa-framework/slsa-github-generator/go@v1", - "inputArtifacts": { + "buildType": "https://github.com/slsa-framework/slsa-github-generator/go@v1", + "externalParameters": { + "artifacts": { // The repo containing the top-level workflow. "source": { "uri": "git+https://github.com/laurentsimon/slsa-verifier-test-gen@refs/heads/main", @@ -145,33 +177,31 @@ of the other top-level fields, such as `subject`, see [Statement]._ } } }, - "entryPoint": ".github/workflow/release.yml", - "parameters": { + "values": { + "workflow": ".github/workflow/release.yml", // For workflow_dispatch events, the `input` field if present. - "workflowDispatchInput": null + "input": null } }, - "buildDependencies": { - "resolvedDependencies": [ - { - "uri": "https://github.com/actions/virtual-environments/releases/tag/ubuntu20/20220515.1" - } - ], - "environment": { + "systemParameters": { + "values": { "github_actor": "...", "github_event_name": "workflow_dispatch", // ... and so on ... } - } + }, + "resolvedDependencies": [ + { + "uri": "https://github.com/actions/virtual-environments/releases/tag/ubuntu20/20220515.1" + } + ] }, "runDetails": { "builder": { "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@refs/tags/v0.0.1", - version: "", // TODO: supply a real example value here "builderDependencies": [ { - // TODO: Do we need a field to differentiate this from other - // deps, e.g localName or something else? + "localName": "builderBinary", "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } } @@ -205,28 +235,27 @@ The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab ```jsonc "predicate": { "buildDefinition": { - "topLevelInputs": { - "buildType": "https://tekton.dev/tekton-task/v0.1", - "inputArtifacts": { + "buildType": "https://tekton.dev/tekton-task/v0.1?draft", + "externalParameters": { + "artifacts": { "source": { "uri": "git+https://github.com/tektoncd/catalog.git", "digest": { "sha1": "962f9005350b1acb359558b6c5aafc420e0beb46" } } }, - "entryPoint": "path/to/sample_taskrun.yaml", - "parameters": null + "values": { + "workflow": "path/to/sample_taskrun.yaml", + } }, - "buildDependencies": { - "resolvedDependencies": [ - { - // The SystemConfiguration that Tekton was configured with. - "localName": "systemConfiguration", - "digest": { "sha256": "..." }, - "mediaType": "... some tekton config thing? ..." - } - ], - "environment": null - } + "systemParameters": null + "resolvedDependencies": [ + { + // The SystemConfiguration that Tekton was configured with. + "localName": "systemConfiguration", + "digest": { "sha256": "..." }, + "mediaType": "... some tekton config thing? ..." + } + ] }, "runDetails": { // Set by the organization running Tekton @@ -248,11 +277,11 @@ meaning of each field is unchanged unless otherwise noted. ```javascript { "buildDefinition": { - "topLevelInputs": { - // The `buildType` MUST be updated for v1.0 to describe how to - // interpret `inputArtifacts`. - "buildType": (updated version of) old.buildType, - "inputArtifacts": { + // The `buildType` MUST be updated for v1.0 to describe how to + // interpret `inputArtifacts`. + "buildType": /* updated version of */ old.buildType, + "externalParameters": { + "artifacts": { // OPTION 1: // If the old `configSource` was the sole top-level input, // (i.e. containing the source or a pointer to the source): @@ -267,38 +296,42 @@ meaning of each field is unchanged unless otherwise noted. "config": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, - } + }, + }, + "values": old.invocation.parameters + { + // It is RECOMMENDED to rename "entryPoint" to something more + // descriptive. + "entryPoint": old.invocation.configSource.entryPoint, }, - "entryPoint": old.invocation.configSource.entryPoint, - "parameters": old.invocation.parameters, }, - "buildDependencies": { - "resolvedDependencies": old.materials, - "environment": old.invocation.environment, - } + "systemParameters": { + "artifacts": null, // not in v0.2 + "values": old.invocation.environment, + }, + "resolvedDependencies": old.materials, }, "runDetails": { "builder": { "id": old.builder.id, - "version": null // not in v0.2 - "builderDependencies": null // not in v0.2 + "version": null, // not in v0.2 + "builderDependencies": null, // not in v0.2 }, "metadata": { "invocationId": old.metadata.buildInvocationId, "startedOn": old.metadata.buildStartedOn, "finishedOn": old.metadata.buildFinishedOn, }, - "byproducts": null // not in v0.2 - } + "byproducts": null, // not in v0.2 + }, } ``` The following fields from v0.2 are no longer present in v1.0: -- `buildConfig`: Instead of inlining the resolved build configuration into the - provenance, either: +- `entryPoint`: Use `externalParameters.values[]` instead. +- `buildConfig`: No longer inlined into the provenance. Instead, either: - If the configuration is a top-level input, record its digest in - `inputArtifacts["config"]`. + `externalParameters.artifacts["config"]`. - Else if there is a known use case for knowing the exact resolved build configuration, record its digest in `byproducts`. An example use case might be someone who wishes to parse the configuration to look for @@ -311,21 +344,27 @@ The following fields from v0.2 are no longer present in v1.0: ### v1.0 (DRAFT) -Refactored to reduce the chance of mistakes in interpretation. The model has -changed slightly. - -- Grouped fields into `buildDefinition.topLevelInputs`, - `buildDefinition.buildDependencies`, and `runDetails` according to their use - case and security properties. -- Replaced `buildConfig` with `inputArtifacts` to allow for multiple top-level - input artifacts to be specified. -- Moved `entryPoint` out of `buildConfig` so that all artifact references - share a common schema. -- Added `builder.version` and `builder.builderDependencies`. -- Added `byproducts`. -- Removed `buildConfig`; can use `inputArtifacts.config` or `byproducts` - instead. +Major refactor to reduce misinterpretation, including a minor change in model. + +- Significantly expanded all documentation. +- Altered the model slightly to better align with real-world build systems, + align with reproducible builds, and make verification easier. +- Grouped fields into `buildDefinition` vs `runDetails`. +- Further grouped fields into `externalParameters` vs `systemParameters`: + - Renamed `parameters` to `externalParameters.values`. + - Renamed `environment` to `systemParameters.values`. + - Removed `entryPoint`; now one of `externalParameters.values`. + - Removed `configSource`; now one of `externalParameters.artifacts`. +- Replaced `materials` with more specific artifact-reference fields: + `resolvedDependencies`, `externalParameters.artifacts`, + `systemParameters.artifacts`, and `builderDependencies`. +- Added `localName`, `downloadLocation`, and `mediaType` to artifact + references. +- Removed `buildConfig`; can be replaced with + `externalParameters.artifacts["config"]`, `byproducts`, or simply omitted. - Removed `completeness` and `reproducible`; now implied by `builder.id`. +- Added `builder.version`. +- Added `byproducts`. ### v0.2 diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 727e3374a..4567919fb 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -23,19 +23,39 @@ message Provenance { // // The accuracy and completeness of this information is implied by // `runDetails.builder.id`. + // + // REQUIRED for SLSA Build L1. BuildDefinition buildDefinition = 1; // Details specific to this particular execution of the build. + // + // REQUIRED for SLSA Build L1. RunDetails runDetails = 2; } message BuildDefinition { - // The set of top-level inputs to the build. This SHOULD contain all the - // information necessary and sufficient to: + // [TypeURI] indicating how to unambiguously interpret this message and + // initiate the build. + // + // This SHOULD resolve to a human-readable specification that includes: + // + // - Overall description. + // - List of all parameters, including: + // - name + // - description + // - external vs system + // - artifact vs value + // - required vs optional + // - any other restrictions + // - Explicit, unambiguous instructions for how to initiate the build given + // this message. // - // 1. Run the build, provided understanding of the build type and access to - // all referenced input artifacts and dependencies. - // 2. Verify that the build was as expected. + // REQUIRED. + string buildType = 1; + + // The set of top-level external inputs to the build. This SHOULD contain all + // the information necessary and sufficient to initialize the build and begin + // execution. "Top-level" means that it is not derived from another input. // // The build system SHOULD be designed to minimize the amount of information // necessary here, in order to reduce fragility and ease verification. @@ -48,7 +68,7 @@ message BuildDefinition { // `buildType`. In particular, any value that is boilerplate and the same // for every build SHOULD be implicit. // - // - Avoid parameters by moving configuration to input artifacts whenever + // - Reduce parameters by moving configuration to input artifacts whenever // possible. For example, instead of passing in compiler flags via a // parameter, require them to live next to the source code or build // configuration. @@ -56,62 +76,41 @@ message BuildDefinition { // - If possible, architect the build system to use this definition as its // sole top-level input, in order to guarantee that the information is // sufficient to run the build. - TopLevelInputs topLevelInputs = 1; - - // Other information necessary to perform the build but either derived from - // `topLevelInputs` or provided by the build system. - // - // The values here SHOULD NOT be under external control unless they are - // constrained and unlikely to significantly affect the build. For example, - // the username who triggered the build might be listed here, rather than - // `topLevelInputs`, because it is a small input and unlikely to be affect the - // behavior of the output, aside from simple inclusion. - BuildDependencies buildDependencies = 2; -} - -message TopLevelInputs { - // [TypeURI] indicating how to unambiguously interpret this message and - // initiate the build. This SHOULD resolve to a human-readable specification - // that lists: // - // - Overall description of what this type means. - // - Schema for `inputArtifacts` and `parameters`. For each field, this - // SHOULD include: - // - name - // - description - // - type - // - required vs optional - // - any other restrictions - // - Explicit, unambiguous instructions for how to initiate the build given - // this message. + // TODO: Describe how complete this must be at each SLSA level. // - // Examples: - // - A "GitHub Actions Workflow" URI might mean to check out the git repo - // `inputArtifacts["source"]` and check out - // definition file pointed to by `parameters[' - // For example, an "NPM build" URI might mean: check out the source repo named - // inputArtifacts["source"], cd to that directory, install the version of - // Node listed in some config file, and then execute `npm run - // `. By designing the type this way, all of - // these implicit steps can be checked against a simple expectation of URI - // without the fragility of a long list of commands. - // - // The type is necessary to make sure that two different parties interpret the - // provenance in the same way. For example, + // REQUIRED for SLSA Build L1. + ParameterCollection externalParameters = 2; + + // Parameters of the build environment that were provided by the `builder` and + // not under external control. The primary intention of this field is for + // debugging, incident response, and vulnerability management. The values here + // MAY be necessary for reproducing the build. // - // 1. Beause - // This is necessary because different build systems use different - // conventions. A human might easily figure out what is supposed to happen by - // machine + // OPTIONAL. + ParameterCollection systemParameters = 3; + + // Resolved dependencies needed at build time. For example, if the build + // script fetches and executes "example.com/foo.sh", which in turn fetches + // "example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" should be + // listed here. // - // TODO: Add examples to full-fledged definitions each with their own - // page. + // Any artifacts listed under `externalParameters` or `systemParameters` + // SHOULD NOT be repeated here. // - // TODO: Provide more guidance on how to define this. + // TODO: Explain what the purpose of this field is. Why do we need it? + // TODO: Explain how to determine what goes here. + // TODO: Explain that it's OK for it to be incomplete. + // TODO: If the dep is already pinned, does it need to be listed here? + // TODO: Should this be a map instead of an array? Then each MUST be named + // explicitly, which would be less ambiguous but more difficult. // - // REQUIRED for SLSA Build L1. - string buildType = 1; + // OPTIONAL. + repeated ArtifactReference resolvedDependencies = 1; +} + +message ParameterCollection { // References to the top-level, independent input artifacts to the build. In // many cases, this is a singular "source" artifact to be built. // @@ -126,6 +125,9 @@ message TopLevelInputs { // `builderBinary` | The top-level binary that runs the build. // `buildImage` | The container or VM image in which the build occurred. // + // IMPORTANT: Each name MUST be unique across `externalParameters`, + // `systemParameters`, `artifacts`, and `values`. + // // In some cases, the build configuration is evaluated client-side and sent // over the wire, such that the build system cannot determine its origin. In // those cases, the build system SHOULD serialize the configuration in a @@ -136,46 +138,19 @@ message TopLevelInputs { // // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? - // - // REQUIRED for SLSA Build L1. - map inputArtifacts = 2; + // TODO: How can we best document this across externalParameters and + // systemParameters? + map artifacts = 1; - // All other input parameters necessary to invoke the build. The schema and - // interpretation are defined by `buildType`. + // Other parameters that are not artifact references. Like `artifacts`, the + // key is a name whose interpretation depends on `buildType`. // - // TODO: finish explanation, particularly that consumers SHOULD have a way to - // know what is expected, and thus it is simplest if it is empty + // IMPORTANT: Each name MUST be unique across `externalParameters`, + // `systemParameters`, `artifacts`, and `values`. // - // REQUIRED for SLSA Build L1, though it may be empty. - google.protobuf.Struct parameters = 4; - - // TODO: We probably need os and architecture. How to record it? Anything else - // in that category? -} - -message BuildDependencies { - // Resolved dependencies needed at build time and referenced by - // `buildDefinition`, whether directly or transitively. For example, if the - // build script fetches and executes "example.com/foo.sh", which in turn - // fetches "example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" - // should be listed here. - // - // The `inputArtifacts` SHOULD NOT be repeated here. - // - // TODO: Explain what the purpose of this field is. Why do we need it? - // TODO: Explain how to determine what goes here. - // TODO: Explain that it's OK for it to be incomplete. - // TODO: If the dep is already pinned, does it need to be listed here? - // TODO: Should this be a map instead of an array? Then each MUST be named - // explicitly, which would be less ambiguous but more difficult. - repeated ArtifactReference resolvedDependencies = 1; - - // Parameters of the build environment that were provided by the `builder` and - // not under external control. The primary intention of this field is for - // debugging, incident response, and vulnerability management. The values here - // MAY be necessary for reproducing the build, but ideally this would not be - // the case. - google.protobuf.Struct environment = 2; + // For simplicity, only string values are supported. If a non-string value is + // used, serialize it in a deterministic way to a string. + map values = 2; } message ArtifactReference { @@ -186,8 +161,8 @@ message ArtifactReference { // Example: `pkg:pypi/pyyaml@6.0` string uri = 1; - // Collection of cryptographic digests for the contents of this artifact. - DigestSet digest = 2; + // [DigestSet] of cryptographic digests for the contents of this artifact. + map digest = 2; // The name for this artifact local to the build. // @@ -204,17 +179,15 @@ message ArtifactReference { string mediaType = 5; } -message DigestSet { - string sha256 = 1; - string sha512 = 2; - // ... -} - message RunDetails { // TODO: The following fields are the same as v0.2: + // + // REQUIRED for SLSA Build L1 unless the id is implicit from the attestation + // envelope (e.g. public key). Builder builder = 1; // TODO: description + // OPTIONAL BuildMetadata metadata = 2; // Additional artifacts generated during the build that should not be @@ -232,6 +205,8 @@ message RunDetails { // // TODO: Do we need some recommendation for how to distinguish between // byproducts? For example, should we recommend using `localName`? + // + // OPTIONAL repeated ArtifactReference byproducts = 3; } @@ -242,21 +217,33 @@ message Builder { // Can we rescope this to avoid the duplication and thus the security concern? // For example, if the envelope identifies the build system, this might // identify the tenant project? + // + // REQUIRED for SLSA Build L1 unless the id is implicit from the attestation + // envelope (e.g. public key). string id = 1; // TODO: Do we want to add this field? (#319) + // + // OPTIONAL string version = 2; // Dependencies used by the orchestrator that are not run within the workload // and that should not affect the build, but may affect the provenance // generation or security guarantees. // TODO: Flesh out this model more. + // + // OPTIONAL repeated ArtifactReference builderDependencies = 3; } message BuildMetadata { // TODO: same as v0.2: + // OPTIONAL string invocationId = 1; + + // OPTIONAL google.protobuf.Timestamp startedOn = 2; + + // OPTIONAL google.protobuf.Timestamp finishedOn = 3; } diff --git a/docs/provenance/v1_model.svg b/docs/provenance/v1_model.svg new file mode 100644 index 000000000..4bc5f5fa7 --- /dev/null +++ b/docs/provenance/v1_model.svg @@ -0,0 +1 @@ + \ No newline at end of file From d044c3dce2f815ee7a34f07653aa2041695d2b71 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Mon, 14 Nov 2022 20:45:26 -0500 Subject: [PATCH 25/44] WIP: fix typo in URL Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index f1b92efcb..0e4f1cd6d 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -32,7 +32,7 @@ The model is as follows: - The build runs on a multi-tenant platform, where each execution is independent. The `builder` is the identity of this platform, representing the transitive closure of all entities that must be - [trusted](../spec/v1.0/priciples.md#trust-systems-verify-artifacts) to + [trusted](../spec/v1.0/principles.md#trust-systems-verify-artifacts) to faithfully run the build and record the provenance. (Note: The same model can be used for platform-less or single-tenant build systems.) From 3ae85a9fd599efb85cc36dee3c155dddc398fc8e Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Mon, 14 Nov 2022 21:24:50 -0500 Subject: [PATCH 26/44] WIP: add todo Signed-off-by: Mark Lodato --- docs/provenance/v1.0.proto | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 4567919fb..3979b60bb 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -162,6 +162,7 @@ message ArtifactReference { string uri = 1; // [DigestSet] of cryptographic digests for the contents of this artifact. + // TODO: Decide on hex vs base64 in #533 then document it here. map digest = 2; // The name for this artifact local to the build. From e60f7426f501f34e8b41e9b4da594bb7ec1c57bb Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Tue, 15 Nov 2022 07:25:50 -0500 Subject: [PATCH 27/44] Use the generic SLSA generator for the example. The "go" example was too confusing because the interface isn't actually a GitHub Actions workflow. Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 0e4f1cd6d..e6f8949b5 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -161,12 +161,12 @@ of the other top-level fields, such as `subject`, see [Statement]._ ### GitHub Actions -#### [SLSA GitHub Generator](https://github.com/slsa-framework/slsa-github-generator) - ```jsonc "predicate": { "buildDefinition": { - "buildType": "https://github.com/slsa-framework/slsa-github-generator/go@v1", + // TODO: Replace this with a stable URL that points to documentation, + // not necessarily tied to this "generator". + "buildType": "https://github.com/slsa-framework/slsa-github-generator/generic@v1", "externalParameters": { "artifacts": { // The repo containing the top-level workflow. @@ -197,8 +197,12 @@ of the other top-level fields, such as `subject`, see [Statement]._ ] }, "runDetails": { + // Provenance generated by the "SLSA GitHub Generator" reusable + // workflow. If GitHub generated the provenance itself, the + // buildDefinition would likely stay the same but this builder would + // identify GitHub rather than this workflow. "builder": { - "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@refs/tags/v0.0.1", + "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_generic_slsa3.yml@refs/tags/v1.2.0", "builderDependencies": [ { "localName": "builderBinary", From 479b7b3392795d62835dfc133eab27977ffb093e Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 30 Nov 2022 08:30:00 -0500 Subject: [PATCH 28/44] Make builder.version a map Signed-off-by: Mark Lodato --- docs/provenance/v1.0.proto | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 3979b60bb..049d466ac 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -224,9 +224,11 @@ message Builder { string id = 1; // TODO: Do we want to add this field? (#319) + // TODO: Should we merge this with builderDependencies into a combined + // "builderParameters"? Then arbitrary information can be stored. // // OPTIONAL - string version = 2; + map version = 2; // Dependencies used by the orchestrator that are not run within the workload // and that should not affect the build, but may affect the provenance From 9ca734683bf6163f54968c8d065d1336d2805208 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 30 Nov 2022 16:37:54 -0500 Subject: [PATCH 29/44] Replace .artifacts[name] with [name].artifact. Signed-off-by: Mark Lodato --- docs/provenance/v1.0.cue | 16 +++--- docs/provenance/v1.0.md | 105 +++++++++++++++++++------------------ docs/provenance/v1.0.proto | 75 +++++++++++--------------- 3 files changed, 92 insertions(+), 104 deletions(-) diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index 56c04978d..a5f9624ae 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -8,14 +8,8 @@ "predicate": { "buildDefinition": { "buildType": string, - "externalParameters": { - "artifacts": { [string]: #ArtifactReference }, - "values": { [string]: string }, - } - "systemParameters": { - "artifacts": { [string]: #ArtifactReference }, - "values": { [string]: string }, - } + "externalParameters": { [string]: #ParameterValue }, + "systemParameters": { [string]: #ParameterValue }, "resolvedDependencies": [ ...#ArtifactReference ], }, "runDetails": { @@ -34,6 +28,12 @@ } } +#ParameterValue: { + "artifact": #ArtifactReference +} | { + "value": string +} + #ArtifactReference: { "uri": string, "digest": { diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index e6f8949b5..397f84bee 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -125,20 +125,23 @@ of the other top-level fields, such as `subject`, see [Statement]._ "buildDefinition": { "buildType": "https://slsa.dev/container-based-build/v0.1?draft", "externalParameters": { - "artifacts": { - // The thing to be built. - "source": { + // The thing to be built. + "source": { + "artifact": { "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", "digest": { "sha1": "deadbeef" } - }, - // The container image in which to build it. - "buildImage": { + } + }, + // The container image in which to build it. + "buildImage": { + "artifact": { "uri": "pkg:oci/builder-image?repository_url=gcr.io", "digest": { "sha256": "53ca44..." } }, - } - "values": { - "configFile": "path/to/config.file" + }, + // The path to the config file within "source". + "configFile": { + "value": "path/to/config.file" } } "systemParameters": null, @@ -168,27 +171,25 @@ of the other top-level fields, such as `subject`, see [Statement]._ // not necessarily tied to this "generator". "buildType": "https://github.com/slsa-framework/slsa-github-generator/generic@v1", "externalParameters": { - "artifacts": { - // The repo containing the top-level workflow. - "source": { + // The repo containing the top-level workflow. + "source": { + "artifact": { "uri": "git+https://github.com/laurentsimon/slsa-verifier-test-gen@refs/heads/main", "digest": { "sha1": "15bf79ea9c89fffbf5dd02c6b5b686b291bfcbd2" } } }, - "values": { - "workflow": ".github/workflow/release.yml", - // For workflow_dispatch events, the `input` field if present. - "input": null - } + "workflow": { + "value": ".github/workflow/release.yml" + }, + // For workflow_dispatch events, the `input` field if present. + "input": null }, "systemParameters": { - "values": { - "github_actor": "...", - "github_event_name": "workflow_dispatch", - // ... and so on ... - } + "github_actor": { "value": "..." }, + "github_event_name": { "value": "workflow_dispatch" }, + // ... and so on ... }, "resolvedDependencies": [ { @@ -241,14 +242,14 @@ The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab "buildDefinition": { "buildType": "https://tekton.dev/tekton-task/v0.1?draft", "externalParameters": { - "artifacts": { - "source": { + "source": { + "artifact": { "uri": "git+https://github.com/tektoncd/catalog.git", "digest": { "sha1": "962f9005350b1acb359558b6c5aafc420e0beb46" } } }, - "values": { - "workflow": "path/to/sample_taskrun.yaml", + "workflow": { + "value": "path/to/sample_taskrun.yaml" } }, "systemParameters": null @@ -284,29 +285,31 @@ meaning of each field is unchanged unless otherwise noted. // The `buildType` MUST be updated for v1.0 to describe how to // interpret `inputArtifacts`. "buildType": /* updated version of */ old.buildType, - "externalParameters": { - "artifacts": { - // OPTION 1: - // If the old `configSource` was the sole top-level input, - // (i.e. containing the source or a pointer to the source): - "source": { + "externalParameters": old.invocation.parameters + { + // It is RECOMMENDED to rename "entryPoint" to something more + // descriptive. + "entryPoint": old.invocation.configSource.entryPoint, + // OPTION 1: + // If the old `configSource` was the sole top-level input, + // (i.e. containing the source or a pointer to the source): + "source": { + "artifact": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, }, - // OPTION 2: - // If the old `configSource` contained just build configuration - // and a separate top-level input contained the source: - "source": old.materials[indexOfSource], - "config": { + }, + // OPTION 2: + // If the old `configSource` contained just build configuration + // and a separate top-level input contained the source: + "source": { + "artifact": old.materials[indexOfSource], + }, + "config": { + "artifact": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, }, }, - "values": old.invocation.parameters + { - // It is RECOMMENDED to rename "entryPoint" to something more - // descriptive. - "entryPoint": old.invocation.configSource.entryPoint, - }, }, "systemParameters": { "artifacts": null, // not in v0.2 @@ -332,10 +335,10 @@ meaning of each field is unchanged unless otherwise noted. The following fields from v0.2 are no longer present in v1.0: -- `entryPoint`: Use `externalParameters.values[]` instead. +- `entryPoint`: Use `externalParameters[]` instead. - `buildConfig`: No longer inlined into the provenance. Instead, either: - If the configuration is a top-level input, record its digest in - `externalParameters.artifacts["config"]`. + `externalParameters["config"]`. - Else if there is a known use case for knowing the exact resolved build configuration, record its digest in `byproducts`. An example use case might be someone who wishes to parse the configuration to look for @@ -354,14 +357,12 @@ Major refactor to reduce misinterpretation, including a minor change in model. - Altered the model slightly to better align with real-world build systems, align with reproducible builds, and make verification easier. - Grouped fields into `buildDefinition` vs `runDetails`. -- Further grouped fields into `externalParameters` vs `systemParameters`: - - Renamed `parameters` to `externalParameters.values`. - - Renamed `environment` to `systemParameters.values`. - - Removed `entryPoint`; now one of `externalParameters.values`. - - Removed `configSource`; now one of `externalParameters.artifacts`. -- Replaced `materials` with more specific artifact-reference fields: - `resolvedDependencies`, `externalParameters.artifacts`, - `systemParameters.artifacts`, and `builderDependencies`. +- Renamed `parameters` and `environment` to `externalParameters` and + `systemParameters`, respectively. Both can now reference artifacts or string + values. +- Split and merged `configSource` into `externalParameters`. +- Split and merged `materials` into `resolvedDependencies`, + `externalParameters`, `systemParameters`, and `builderDependencies`. - Added `localName`, `downloadLocation`, and `mediaType` to artifact references. - Removed `buildConfig`; can be replaced with diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 049d466ac..8b92c6dfb 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -57,6 +57,15 @@ message BuildDefinition { // the information necessary and sufficient to initialize the build and begin // execution. "Top-level" means that it is not derived from another input. // + // The key is a name whose interpretation depends on `buildType`. It MUST be + // unique across `externalParameters` and `systemParameters`. The following + // conventional names are RECOMMENDED when appropriate: + // + // name | description + // -------- | ----------- + // `source` | The primary input to the build. + // `config` | The build configuration, if different from `source`. + // // The build system SHOULD be designed to minimize the amount of information // necessary here, in order to reduce fragility and ease verification. // Consumers SHOULD have an expectation of what "good" looks like; the more @@ -77,10 +86,22 @@ message BuildDefinition { // sole top-level input, in order to guarantee that the information is // sufficient to run the build. // + // - In some cases, the build configuration is evaluated client-side and + // sent over the wire, such that the build system cannot determine its + // origin. In those cases, the build system SHOULD serialize the + // configuration in a deterministic way and record the `digest` without a + // `uri`. This allows one to consider the client-side evaluation as a + // separate "build" with its own provenance, such that the verifier can + // chain the two provenance attestations together to determine the origin + // of the configuration. + // // TODO: Describe how complete this must be at each SLSA level. // + // TODO: Some requirement that the builder verifies the URI and that the + // verifier checks it against expectations? + // // REQUIRED for SLSA Build L1. - ParameterCollection externalParameters = 2; + map externalParameters = 2; // Parameters of the build environment that were provided by the `builder` and // not under external control. The primary intention of this field is for @@ -88,7 +109,7 @@ message BuildDefinition { // MAY be necessary for reproducing the build. // // OPTIONAL. - ParameterCollection systemParameters = 3; + map systemParameters = 3; // Resolved dependencies needed at build time. For example, if the build // script fetches and executes "example.com/foo.sh", which in turn fetches @@ -109,48 +130,14 @@ message BuildDefinition { repeated ArtifactReference resolvedDependencies = 1; } - -message ParameterCollection { - // References to the top-level, independent input artifacts to the build. In - // many cases, this is a singular "source" artifact to be built. - // - // The key is a name whose interpretation depends on `buildType`. If there is - // only one input, it SHOULD be named "source". The following conventional - // names are RECOMMENDED when appropriate: - // - // name | description - // --------------- | ----------- - // `source` | The primary input to the build. - // `config` | The build configuration, if different from `source`. - // `builderBinary` | The top-level binary that runs the build. - // `buildImage` | The container or VM image in which the build occurred. - // - // IMPORTANT: Each name MUST be unique across `externalParameters`, - // `systemParameters`, `artifacts`, and `values`. - // - // In some cases, the build configuration is evaluated client-side and sent - // over the wire, such that the build system cannot determine its origin. In - // those cases, the build system SHOULD serialize the configuration in a - // deterministic way and record the `digest` without a `uri`. This allows one - // to consider the client-side evaluation as a separate "build" with its own - // provenance, such that the verifier can chain the two provenance - // attestations together to determine the origin of the configuration. - // - // TODO: Some requirement that the builder verifies the URI and that the - // verifier checks it against expectations? - // TODO: How can we best document this across externalParameters and - // systemParameters? - map artifacts = 1; - - // Other parameters that are not artifact references. Like `artifacts`, the - // key is a name whose interpretation depends on `buildType`. - // - // IMPORTANT: Each name MUST be unique across `externalParameters`, - // `systemParameters`, `artifacts`, and `values`. - // - // For simplicity, only string values are supported. If a non-string value is - // used, serialize it in a deterministic way to a string. - map values = 2; +message ParameterValue { + oneof value { + // A parameter that is a reference to an artifact. + ArtifactReference artifact = 1; + // A parameter that is a scalar value. For simplicity, only string values + // are supported. + string value = 2; + } } message ArtifactReference { From d63f6badf67d23c6cb99f4a3a2c0c5d94af44013 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 30 Nov 2022 17:04:39 -0500 Subject: [PATCH 30/44] Replace map with array of name/value pairs Signed-off-by: Mark Lodato --- docs/provenance/v1.0.cue | 12 +++++---- docs/provenance/v1.0.md | 52 ++++++++++++++++++++++---------------- docs/provenance/v1.0.proto | 49 +++++++++++++++++++++-------------- 3 files changed, 67 insertions(+), 46 deletions(-) diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index a5f9624ae..6a289f87e 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -8,8 +8,8 @@ "predicate": { "buildDefinition": { "buildType": string, - "externalParameters": { [string]: #ParameterValue }, - "systemParameters": { [string]: #ParameterValue }, + "externalParameters": [ ...#Parameter ], + "systemParameters": [ ...#Parameter ], "resolvedDependencies": [ ...#ArtifactReference ], }, "runDetails": { @@ -28,10 +28,12 @@ } } -#ParameterValue: { - "artifact": #ArtifactReference +#Parameter: { + "name": string, + "artifact": #ArtifactReference, } | { - "value": string + "name": string, + "value": string, } #ArtifactReference: { diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 397f84bee..3ae08c446 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -124,26 +124,29 @@ of the other top-level fields, such as `subject`, see [Statement]._ "predicate": { "buildDefinition": { "buildType": "https://slsa.dev/container-based-build/v0.1?draft", - "externalParameters": { + "externalParameters": [ // The thing to be built. - "source": { + { + "name": "source", "artifact": { "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", "digest": { "sha1": "deadbeef" } } }, // The container image in which to build it. - "buildImage": { + { + "name": "buildImage", "artifact": { "uri": "pkg:oci/builder-image?repository_url=gcr.io", "digest": { "sha256": "53ca44..." } }, }, // The path to the config file within "source". - "configFile": { + { + "name": "configFile", "value": "path/to/config.file" } - } + ], "systemParameters": null, "resolvedDependencies": null, }, @@ -241,17 +244,19 @@ The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab "predicate": { "buildDefinition": { "buildType": "https://tekton.dev/tekton-task/v0.1?draft", - "externalParameters": { - "source": { + "externalParameters": [ + { + "name": "source", "artifact": { "uri": "git+https://github.com/tektoncd/catalog.git", "digest": { "sha1": "962f9005350b1acb359558b6c5aafc420e0beb46" } } }, - "workflow": { + { + "name": "workflow", "value": "path/to/sample_taskrun.yaml" } - }, + ], "systemParameters": null "resolvedDependencies": [ { @@ -285,14 +290,18 @@ meaning of each field is unchanged unless otherwise noted. // The `buildType` MUST be updated for v1.0 to describe how to // interpret `inputArtifacts`. "buildType": /* updated version of */ old.buildType, - "externalParameters": old.invocation.parameters + { + "externalParameters": toArray(old.invocation.parameters) + { // It is RECOMMENDED to rename "entryPoint" to something more // descriptive. - "entryPoint": old.invocation.configSource.entryPoint, + { + "name": "entryPoint", + "value": old.invocation.configSource.entryPoint, + }, // OPTION 1: // If the old `configSource` was the sole top-level input, // (i.e. containing the source or a pointer to the source): - "source": { + { + "name": "source", "artifact": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, @@ -301,20 +310,19 @@ meaning of each field is unchanged unless otherwise noted. // OPTION 2: // If the old `configSource` contained just build configuration // and a separate top-level input contained the source: - "source": { + { + "name": "source", "artifact": old.materials[indexOfSource], }, - "config": { + { + "name": "config", "artifact": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, }, }, }, - "systemParameters": { - "artifacts": null, // not in v0.2 - "values": old.invocation.environment, - }, + "systemParameters": toArray(old.invocation.environment), "resolvedDependencies": old.materials, }, "runDetails": { @@ -335,10 +343,10 @@ meaning of each field is unchanged unless otherwise noted. The following fields from v0.2 are no longer present in v1.0: -- `entryPoint`: Use `externalParameters[]` instead. +- `entryPoint`: Use `externalParameters` instead. - `buildConfig`: No longer inlined into the provenance. Instead, either: - If the configuration is a top-level input, record its digest in - `externalParameters["config"]`. + `externalParameters`. - Else if there is a known use case for knowing the exact resolved build configuration, record its digest in `byproducts`. An example use case might be someone who wishes to parse the configuration to look for @@ -365,8 +373,8 @@ Major refactor to reduce misinterpretation, including a minor change in model. `externalParameters`, `systemParameters`, and `builderDependencies`. - Added `localName`, `downloadLocation`, and `mediaType` to artifact references. -- Removed `buildConfig`; can be replaced with - `externalParameters.artifacts["config"]`, `byproducts`, or simply omitted. +- Removed `buildConfig`; can be replaced with `externalParameters`, + `byproducts`, or simply omitted. - Removed `completeness` and `reproducible`; now implied by `builder.id`. - Added `builder.version`. - Added `byproducts`. diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 8b92c6dfb..bb565c891 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -57,14 +57,7 @@ message BuildDefinition { // the information necessary and sufficient to initialize the build and begin // execution. "Top-level" means that it is not derived from another input. // - // The key is a name whose interpretation depends on `buildType`. It MUST be - // unique across `externalParameters` and `systemParameters`. The following - // conventional names are RECOMMENDED when appropriate: - // - // name | description - // -------- | ----------- - // `source` | The primary input to the build. - // `config` | The build configuration, if different from `source`. + // The interpretation of each parameter is determined by `buildType`. // // The build system SHOULD be designed to minimize the amount of information // necessary here, in order to reduce fragility and ease verification. @@ -95,21 +88,27 @@ message BuildDefinition { // chain the two provenance attestations together to determine the origin // of the configuration. // + // SHOULD be sorted by `name` to make the provenance deterministic. + // // TODO: Describe how complete this must be at each SLSA level. // // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? // // REQUIRED for SLSA Build L1. - map externalParameters = 2; + repeated Parameter externalParameters = 2; // Parameters of the build environment that were provided by the `builder` and // not under external control. The primary intention of this field is for // debugging, incident response, and vulnerability management. The values here // MAY be necessary for reproducing the build. // + // The interpretation of each parameter is determined by `buildType`. + // + // SHOULD be sorted by `name` to make the provenance deterministic. + // // OPTIONAL. - map systemParameters = 3; + repeated Parameter systemParameters = 3; // Resolved dependencies needed at build time. For example, if the build // script fetches and executes "example.com/foo.sh", which in turn fetches @@ -123,20 +122,32 @@ message BuildDefinition { // TODO: Explain how to determine what goes here. // TODO: Explain that it's OK for it to be incomplete. // TODO: If the dep is already pinned, does it need to be listed here? - // TODO: Should this be a map instead of an array? Then each MUST be named - // explicitly, which would be less ambiguous but more difficult. + // TODO: Should this be a Parameter instead of ArtifactReference? On the one + // hand, that would allow specification on scalar values. On the other, + // it would require each to be explicitly named (could be good or bad). // // OPTIONAL. repeated ArtifactReference resolvedDependencies = 1; } -message ParameterValue { - oneof value { - // A parameter that is a reference to an artifact. - ArtifactReference artifact = 1; - // A parameter that is a scalar value. For simplicity, only string values - // are supported. - string value = 2; +message Parameter { + // The name of this parameter. Must be unique across `externalParameters` and + // `systemParameters`. + // + // The following conventional names are RECOMMENDED when appropriate: + // + // name | description + // -------- | ----------- + // `source` | The primary input to the build. + // `config` | The build configuration, if different from `source`. + // + string name = 1; + + oneof value_type { + // A parameter value that is a reference to an artifact. + ArtifactReference artifact = 2; + // A scalar parameter value. For simplicity, only string type is supported. + string value = 3; } } From 03a066000033fad54af5ce5d88f8aa672b130237 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Thu, 8 Dec 2022 14:05:16 -0500 Subject: [PATCH 31/44] Revert "Replace map with array of name/value pairs" This reverts commit d63f6badf67d23c6cb99f4a3a2c0c5d94af44013. --- docs/provenance/v1.0.cue | 12 ++++----- docs/provenance/v1.0.md | 52 ++++++++++++++++---------------------- docs/provenance/v1.0.proto | 49 ++++++++++++++--------------------- 3 files changed, 46 insertions(+), 67 deletions(-) diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1.0.cue index 6a289f87e..a5f9624ae 100644 --- a/docs/provenance/v1.0.cue +++ b/docs/provenance/v1.0.cue @@ -8,8 +8,8 @@ "predicate": { "buildDefinition": { "buildType": string, - "externalParameters": [ ...#Parameter ], - "systemParameters": [ ...#Parameter ], + "externalParameters": { [string]: #ParameterValue }, + "systemParameters": { [string]: #ParameterValue }, "resolvedDependencies": [ ...#ArtifactReference ], }, "runDetails": { @@ -28,12 +28,10 @@ } } -#Parameter: { - "name": string, - "artifact": #ArtifactReference, +#ParameterValue: { + "artifact": #ArtifactReference } | { - "name": string, - "value": string, + "value": string } #ArtifactReference: { diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 3ae08c446..397f84bee 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -124,29 +124,26 @@ of the other top-level fields, such as `subject`, see [Statement]._ "predicate": { "buildDefinition": { "buildType": "https://slsa.dev/container-based-build/v0.1?draft", - "externalParameters": [ + "externalParameters": { // The thing to be built. - { - "name": "source", + "source": { "artifact": { "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", "digest": { "sha1": "deadbeef" } } }, // The container image in which to build it. - { - "name": "buildImage", + "buildImage": { "artifact": { "uri": "pkg:oci/builder-image?repository_url=gcr.io", "digest": { "sha256": "53ca44..." } }, }, // The path to the config file within "source". - { - "name": "configFile", + "configFile": { "value": "path/to/config.file" } - ], + } "systemParameters": null, "resolvedDependencies": null, }, @@ -244,19 +241,17 @@ The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab "predicate": { "buildDefinition": { "buildType": "https://tekton.dev/tekton-task/v0.1?draft", - "externalParameters": [ - { - "name": "source", + "externalParameters": { + "source": { "artifact": { "uri": "git+https://github.com/tektoncd/catalog.git", "digest": { "sha1": "962f9005350b1acb359558b6c5aafc420e0beb46" } } }, - { - "name": "workflow", + "workflow": { "value": "path/to/sample_taskrun.yaml" } - ], + }, "systemParameters": null "resolvedDependencies": [ { @@ -290,18 +285,14 @@ meaning of each field is unchanged unless otherwise noted. // The `buildType` MUST be updated for v1.0 to describe how to // interpret `inputArtifacts`. "buildType": /* updated version of */ old.buildType, - "externalParameters": toArray(old.invocation.parameters) + { + "externalParameters": old.invocation.parameters + { // It is RECOMMENDED to rename "entryPoint" to something more // descriptive. - { - "name": "entryPoint", - "value": old.invocation.configSource.entryPoint, - }, + "entryPoint": old.invocation.configSource.entryPoint, // OPTION 1: // If the old `configSource` was the sole top-level input, // (i.e. containing the source or a pointer to the source): - { - "name": "source", + "source": { "artifact": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, @@ -310,19 +301,20 @@ meaning of each field is unchanged unless otherwise noted. // OPTION 2: // If the old `configSource` contained just build configuration // and a separate top-level input contained the source: - { - "name": "source", + "source": { "artifact": old.materials[indexOfSource], }, - { - "name": "config", + "config": { "artifact": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, }, }, }, - "systemParameters": toArray(old.invocation.environment), + "systemParameters": { + "artifacts": null, // not in v0.2 + "values": old.invocation.environment, + }, "resolvedDependencies": old.materials, }, "runDetails": { @@ -343,10 +335,10 @@ meaning of each field is unchanged unless otherwise noted. The following fields from v0.2 are no longer present in v1.0: -- `entryPoint`: Use `externalParameters` instead. +- `entryPoint`: Use `externalParameters[]` instead. - `buildConfig`: No longer inlined into the provenance. Instead, either: - If the configuration is a top-level input, record its digest in - `externalParameters`. + `externalParameters["config"]`. - Else if there is a known use case for knowing the exact resolved build configuration, record its digest in `byproducts`. An example use case might be someone who wishes to parse the configuration to look for @@ -373,8 +365,8 @@ Major refactor to reduce misinterpretation, including a minor change in model. `externalParameters`, `systemParameters`, and `builderDependencies`. - Added `localName`, `downloadLocation`, and `mediaType` to artifact references. -- Removed `buildConfig`; can be replaced with `externalParameters`, - `byproducts`, or simply omitted. +- Removed `buildConfig`; can be replaced with + `externalParameters.artifacts["config"]`, `byproducts`, or simply omitted. - Removed `completeness` and `reproducible`; now implied by `builder.id`. - Added `builder.version`. - Added `byproducts`. diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index bb565c891..8b92c6dfb 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -57,7 +57,14 @@ message BuildDefinition { // the information necessary and sufficient to initialize the build and begin // execution. "Top-level" means that it is not derived from another input. // - // The interpretation of each parameter is determined by `buildType`. + // The key is a name whose interpretation depends on `buildType`. It MUST be + // unique across `externalParameters` and `systemParameters`. The following + // conventional names are RECOMMENDED when appropriate: + // + // name | description + // -------- | ----------- + // `source` | The primary input to the build. + // `config` | The build configuration, if different from `source`. // // The build system SHOULD be designed to minimize the amount of information // necessary here, in order to reduce fragility and ease verification. @@ -88,27 +95,21 @@ message BuildDefinition { // chain the two provenance attestations together to determine the origin // of the configuration. // - // SHOULD be sorted by `name` to make the provenance deterministic. - // // TODO: Describe how complete this must be at each SLSA level. // // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? // // REQUIRED for SLSA Build L1. - repeated Parameter externalParameters = 2; + map externalParameters = 2; // Parameters of the build environment that were provided by the `builder` and // not under external control. The primary intention of this field is for // debugging, incident response, and vulnerability management. The values here // MAY be necessary for reproducing the build. // - // The interpretation of each parameter is determined by `buildType`. - // - // SHOULD be sorted by `name` to make the provenance deterministic. - // // OPTIONAL. - repeated Parameter systemParameters = 3; + map systemParameters = 3; // Resolved dependencies needed at build time. For example, if the build // script fetches and executes "example.com/foo.sh", which in turn fetches @@ -122,32 +123,20 @@ message BuildDefinition { // TODO: Explain how to determine what goes here. // TODO: Explain that it's OK for it to be incomplete. // TODO: If the dep is already pinned, does it need to be listed here? - // TODO: Should this be a Parameter instead of ArtifactReference? On the one - // hand, that would allow specification on scalar values. On the other, - // it would require each to be explicitly named (could be good or bad). + // TODO: Should this be a map instead of an array? Then each MUST be named + // explicitly, which would be less ambiguous but more difficult. // // OPTIONAL. repeated ArtifactReference resolvedDependencies = 1; } -message Parameter { - // The name of this parameter. Must be unique across `externalParameters` and - // `systemParameters`. - // - // The following conventional names are RECOMMENDED when appropriate: - // - // name | description - // -------- | ----------- - // `source` | The primary input to the build. - // `config` | The build configuration, if different from `source`. - // - string name = 1; - - oneof value_type { - // A parameter value that is a reference to an artifact. - ArtifactReference artifact = 2; - // A scalar parameter value. For simplicity, only string type is supported. - string value = 3; +message ParameterValue { + oneof value { + // A parameter that is a reference to an artifact. + ArtifactReference artifact = 1; + // A parameter that is a scalar value. For simplicity, only string values + // are supported. + string value = 2; } } From 0ca0d69ac72a8a6548e1e4f0d310ff5d187ebe14 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 4 Jan 2023 15:26:12 -0500 Subject: [PATCH 32/44] Update provenance build model Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 56 ++++++++++++++++++++---------------- docs/provenance/v1_model.svg | 2 +- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 397f84bee..28985c06a 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -29,34 +29,42 @@ artifacts through execution of the `buildDefinition`. The model is as follows: -- The build runs on a multi-tenant platform, where each execution is - independent. The `builder` is the identity of this platform, representing - the transitive closure of all entities that must be +- Each build runs as an independent process on a multi-tenant platform. The + `builder` is the identity of this platform, representing the transitive + closure of all entities that must be [trusted](../spec/v1.0/principles.md#trust-systems-verify-artifacts) to faithfully run the build and record the provenance. (Note: The same model can be used for platform-less or single-tenant build systems.) -- The build process is defined by a parameterized template, identified by - `buildType`. Often a build platform only supports a single template, e.g. - the GitHub Actions platform only supports executing a GitHub Actions - workflow file. - -- The external interface to the build is through `externalParameters`, which - captures the set of top-level (i.e. independent) inputs to the build. Some - of these parameters are references to artifacts while others are pure - values. For example, for GitHub Actions, this would be the source repository - (artifact reference) and path to the workflow file (value). - -- The build runs inside an environment initialized by the platform. The - `systemParameters` capture the top-level (i.e. independent) parameters to - this environment. These parameters are set internally by the platform, - though they may refer to external artifacts. (The build platform may also - communicate with the build environment through some control plane, but this - is not captured in the provenance.) - -- Dependent artifacts may be fetched during initialization or execution of the - build process. The `resolvedDependencies` captures these dependencies, if - known. +- The build process's initial state is defined by a parameterized template. + The `buildType` is the identity of this template. Often a build platform + only supports a single template, e.g. the GitHub Actions platform only + supports executing a GitHub Actions workflow file. + +- The parameters define the set of top-level (i.e. independent) inputs to the + build: + + - `externalParameters` are the external interface to the build. In SLSA, + these values are untrusted; they MUST be included in the provenance and + MUST be verified downstream. + + - `systemParameters` are set internally by the platform. In SLSA, these + values are trusted; they are OPTIONAL and need not be verified + downstream. They MAY be included to enable reproducible builds, + debugging, or incident response. + + Parameters are either artifact references or pure values. For example, the + external parameters for a GitHub Actions workflow includes the source + repository (artifact reference) and the path to the workflow file (value). + +- Dependencies are any other artifacts fetched during initialization or + execution of the build process. The `resolvedDependencies` captures these + dependencies, if known. + +- During execution, the build process MAY communicate with the build + platform's control plane and/or build caches. This communication is not + captured in the provenance but is subject to [SLSA + Requirements](../spec/v1.0/requirements.md). - Finally, the build process outputs one or more artifacts, identified by `subject`. diff --git a/docs/provenance/v1_model.svg b/docs/provenance/v1_model.svg index 4bc5f5fa7..1d8d7befa 100644 --- a/docs/provenance/v1_model.svg +++ b/docs/provenance/v1_model.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file From 622c0b54774a9f1793275fa644ca58589a90c272 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 4 Jan 2023 16:34:15 -0500 Subject: [PATCH 33/44] Disable lint for blank lines betwen blockqutoes We often use multiple adjacent blockquotes for TODOs or warnings. This is perfectly valid Markdown. The lint marks it as a possible mistake, but as long as the reviewer verifies that it renders correctly, this gets in the way more than it helps. Signed-off-by: Mark Lodato --- .markdownlint.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.markdownlint.yaml b/.markdownlint.yaml index dcff86bb8..93068bb56 100644 --- a/.markdownlint.yaml +++ b/.markdownlint.yaml @@ -14,6 +14,9 @@ MD025: # Disable checking of YAML frontmatter. front_matter_title: "" +# MD028/no-blanks-blockquote - Blank line inside blockquote +MD028: false + # MD029/ol-prefix - Ordered list item prefix MD029: # List style From 77d58142b5f0e624ca6df05dc0615420a6521cf2 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 4 Jan 2023 16:46:39 -0500 Subject: [PATCH 34/44] proto nits: consistent required/optional syntax Signed-off-by: Mark Lodato --- docs/provenance/v1.0.proto | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index 8b92c6dfb..f56d473ff 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -24,12 +24,12 @@ message Provenance { // The accuracy and completeness of this information is implied by // `runDetails.builder.id`. // - // REQUIRED for SLSA Build L1. + // REQUIRED at SLSA Build L1. BuildDefinition buildDefinition = 1; // Details specific to this particular execution of the build. // - // REQUIRED for SLSA Build L1. + // REQUIRED at SLSA Build L1. RunDetails runDetails = 2; } @@ -100,7 +100,7 @@ message BuildDefinition { // TODO: Some requirement that the builder verifies the URI and that the // verifier checks it against expectations? // - // REQUIRED for SLSA Build L1. + // REQUIRED at SLSA Build L1. map externalParameters = 2; // Parameters of the build environment that were provided by the `builder` and @@ -170,12 +170,12 @@ message ArtifactReference { message RunDetails { // TODO: The following fields are the same as v0.2: // - // REQUIRED for SLSA Build L1 unless the id is implicit from the attestation + // REQUIRED at SLSA Build L1 unless the id is implicit from the attestation // envelope (e.g. public key). Builder builder = 1; // TODO: description - // OPTIONAL + // OPTIONAL. BuildMetadata metadata = 2; // Additional artifacts generated during the build that should not be @@ -194,7 +194,7 @@ message RunDetails { // TODO: Do we need some recommendation for how to distinguish between // byproducts? For example, should we recommend using `localName`? // - // OPTIONAL + // OPTIONAL. repeated ArtifactReference byproducts = 3; } @@ -206,7 +206,7 @@ message Builder { // For example, if the envelope identifies the build system, this might // identify the tenant project? // - // REQUIRED for SLSA Build L1 unless the id is implicit from the attestation + // REQUIRED at SLSA Build L1 unless the id is implicit from the attestation // envelope (e.g. public key). string id = 1; @@ -214,7 +214,7 @@ message Builder { // TODO: Should we merge this with builderDependencies into a combined // "builderParameters"? Then arbitrary information can be stored. // - // OPTIONAL + // OPTIONAL. map version = 2; // Dependencies used by the orchestrator that are not run within the workload @@ -222,18 +222,18 @@ message Builder { // generation or security guarantees. // TODO: Flesh out this model more. // - // OPTIONAL + // OPTIONAL. repeated ArtifactReference builderDependencies = 3; } message BuildMetadata { // TODO: same as v0.2: - // OPTIONAL + // OPTIONAL. string invocationId = 1; - // OPTIONAL + // OPTIONAL. google.protobuf.Timestamp startedOn = 2; - // OPTIONAL + // OPTIONAL. google.protobuf.Timestamp finishedOn = 3; } From a18326b13e03696a062a70566b3dd9e200801cb5 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Thu, 5 Jan 2023 09:11:44 -0500 Subject: [PATCH 35/44] More iteration on model Signed-off-by: Mark Lodato --- docs/provenance/v1.0.md | 58 +++++++++++++++++------------------- docs/provenance/v1_model.svg | 2 +- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 28985c06a..61bf40798 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -36,30 +36,31 @@ The model is as follows: faithfully run the build and record the provenance. (Note: The same model can be used for platform-less or single-tenant build systems.) -- The build process's initial state is defined by a parameterized template. - The `buildType` is the identity of this template. Often a build platform - only supports a single template, e.g. the GitHub Actions platform only - supports executing a GitHub Actions workflow file. +- The build process is defined by a parameterized template, identified by + `buildType`. Often a build platform only supports a single build type. For + example, the GitHub Actions platform only supports executing a GitHub + Actions workflow file. -- The parameters define the set of top-level (i.e. independent) inputs to the - build: +- All top-level, independent inputs are captured by the parameters to the + template. There are two types of parameters: - - `externalParameters` are the external interface to the build. In SLSA, + - `externalParameters`: the external interface to the build. In SLSA, these values are untrusted; they MUST be included in the provenance and MUST be verified downstream. - - `systemParameters` are set internally by the platform. In SLSA, these - values are trusted; they are OPTIONAL and need not be verified - downstream. They MAY be included to enable reproducible builds, - debugging, or incident response. + - `systemParameters`: set internally by the platform. In SLSA, these + values are trusted because the platform is trusted; they are OPTIONAL + and need not be verified downstream. They MAY be included to enable + reproducible builds, debugging, or incident response. - Parameters are either artifact references or pure values. For example, the + Some (but not all) parameters are references to artifacts. For example, the external parameters for a GitHub Actions workflow includes the source - repository (artifact reference) and the path to the workflow file (value). + repository (artifact reference) and the path to the workflow file (string + value). -- Dependencies are any other artifacts fetched during initialization or - execution of the build process. The `resolvedDependencies` captures these - dependencies, if known. +- All other artifacts fetched during initialization or execution of the build + process are considered dependencies. The `resolvedDependencies` captures + these dependencies, if known. - During execution, the build process MAY communicate with the build platform's control plane and/or build caches. This communication is not @@ -73,19 +74,7 @@ See [examples](#examples) for concrete examples. > **TODO:** Align with the [Build model](../spec/v1.0/terminology.md). -> **TODO:** Limit the size of the diagram - it's too big on desktop (but fine on -> mobile). Also reimplement it in Figma so that others can edit it. - -## Schema - -> **TODO:** Get proper syntax highlighting for cue, and explain that this is a -> cue schema. - -```javascript -{% include_relative v1.0.cue %} -``` - -### Parsing rules +## Parsing rules This predicate follows the in-toto attestation [parsing rules]. Summary: @@ -98,7 +87,16 @@ This predicate follows the in-toto attestation [parsing rules]. Summary: - Optional fields MAY be unset or null, and should be treated equivalently. Both are equivalent to empty for _object_ or _array_ values. -### Fields +## Schema + +> **TODO:** Get proper syntax highlighting for cue, and explain that this is a +> cue schema. + +```javascript +{% include_relative v1.0.cue %} +``` + +## Fields _NOTE: This section describes the fields within `predicate`. For a description of the other top-level fields, such as `subject`, see [Statement]._ diff --git a/docs/provenance/v1_model.svg b/docs/provenance/v1_model.svg index 1d8d7befa..3050bbae1 100644 --- a/docs/provenance/v1_model.svg +++ b/docs/provenance/v1_model.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file From 3b728b4c19504d4a5452a7ddfa92acffff2c5c43 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 6 Jan 2023 17:11:21 -0500 Subject: [PATCH 36/44] Move github actions to separate file; revise text Signed-off-by: Mark Lodato --- docs/_data/versions.yml | 7 + .../github-actions-workflow/v0.1/example.json | 44 +++++ docs/github-actions-workflow/v0.1/index.md | 106 ++++++++++ docs/provenance/v1.0.md | 183 +----------------- docs/provenance/v1.0.proto | 24 ++- 5 files changed, 188 insertions(+), 176 deletions(-) create mode 100644 docs/github-actions-workflow/v0.1/example.json create mode 100644 docs/github-actions-workflow/v0.1/index.md diff --git a/docs/_data/versions.yml b/docs/_data/versions.yml index da9c0a04f..2705321a5 100644 --- a/docs/_data/versions.yml +++ b/docs/_data/versions.yml @@ -41,3 +41,10 @@ verification_summary: v0.2: name: Version 0.2 current: v0.2 + +github-actions-workflow: + versions: + v0.1: + name: Version 0.1 (DRAFT) + draft: true + current: v0.1 diff --git a/docs/github-actions-workflow/v0.1/example.json b/docs/github-actions-workflow/v0.1/example.json new file mode 100644 index 000000000..427c3bd90 --- /dev/null +++ b/docs/github-actions-workflow/v0.1/example.json @@ -0,0 +1,44 @@ +{ + "predicateType": "https://slsa.dev/provenance/v1?draft", + "predicate": { + "buildDefinition": { + "buildType": "https://slsa.dev/github-actions-workflow/v0.1?draft", + "externalParameters": { + "inputs_build_id": { "value": "123456768" }, + "inputs_deploy_target": { "value": "deployment_sys_1a" }, + "inputs_perform_deploy": { "value": "true" }, + "source": { + "artifact": { + "uri": "git+https://github.com/octocat/hello-world@refs/heads/main", + "digest": { "sha1": "c27d339ee6075c1f744c5d4b200f7901aad2c369" } + } + }, + "workflow_path": { "value": ".github/workflow/release.yml" } + }, + "systemParameters": { + "github_actor": { "value": "MarkLodato" }, + "github_event_name": { "value": "workflow_dispatch" } + }, + "resolvedDependencies": [ + { + "uri": "https://github.com/actions/virtual-environments/releases/tag/ubuntu20/20220515.1" + } + ] + }, + "runDetails": { + "builder": { + "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@refs/tags/v0.0.1" + }, + "metadata": { + "invocationId": "https://github.com/octocat/hello-world/actions/runs/1536140711/attempts/1", + "startedOn": "2023-01-01T12:34:56Z" + } + } + }, + "subject": [ + { + "name": "_", + "digest": { "sha256": "fe4fe40ac7250263c5dbe1cf3138912f3f416140aa248637a60d65fe22c47da4" } + } + ] +} diff --git a/docs/github-actions-workflow/v0.1/index.md b/docs/github-actions-workflow/v0.1/index.md new file mode 100644 index 000000000..4ba8daccb --- /dev/null +++ b/docs/github-actions-workflow/v0.1/index.md @@ -0,0 +1,106 @@ +--- +title: "Build Type: GitHub Actions Workflow" +layout: standard +hero_text: | + A [SLSA Provenance](../../provenance/v1.0) `buildType` that describes the + execution of a GitHub Actions workflow. +--- + +## Description + +This `buildType` describes the execution of a top-level [GitHub Actions] +workflow (as a whole). + +Note: This type is not meant to describe execution of subsets of the top-level +workflow, such as an action, a job, or a reusable workflow. + +[GitHub Actions]: https://docs.github.com/en/actions + +## Build Definition + +### External parameters + +All external parameters are REQUIRED. + + +
ParameterTypeDescription + +
inputs_*string + +The [inputs context], with each `inputs.` renamed to `inputs_`. +Every non-empty input value MUST be recorded. Empty values SHOULD be omitted. + +Note: Only `workflow_dispatch` events and reusable workflows have inputs. + +
sourceartifact + +The git repository containing the top-level workflow YAML file. + +This can be computed from the [github context] using +`"git+" + github.server_url + "/" + github.repository + "@" + github.ref`. + +
workflow_pathstring + +The path to the workflow YAML file within `source`. + +Note: this cannot be computed directly from the [github context]: the +`github.workflow` context field only provides the *name* of the workflow, not +the path. See [getEntryPoint] for one possible implementation. + +[getEntryPoint]: https://github.com/slsa-framework/slsa-github-generator/blob/ae7e58c315b65aa92b9440d5ce25d795845b3b2a/slsa/buildtype.go#L94-L135 + +
+ +[github context]: https://docs.github.com/en/actions/learn-github-actions/contexts#github-context +[inputs context]: https://docs.github.com/en/actions/learn-github-actions/contexts#inputs-context + +### System parameters + +> TODO: None of these are really "parameters", per se, but rather metadata +> about the build. Perhaps they should go in `runDetails` instead? The problem +> is that we don't have an appropriate field for it currently. + +All system parameters are OPTIONAL. Each corresponds to the [github context] +value of the same name, with `github.` renamed to `github_`. The +list only includes parameters that are likely to have an effect on the build and +that are not already captured elsewhere. + +| Parameter | Type | Description | +| -------------------- | -------- | ----------- | +| `github_actor` | string | The username of the user that triggered the initial workflow run. | +| `github_event_name` | string | The name of the event that triggered the workflow run. | + +> TODO: What about `actor_id`, `repository_id`, and `repository_owner_id`? Those +> are not part of the context so they're harder to describe, and the repository +> ones should arguably go on the `source` paramater rather than be here. +> +> Also `base_ref` and `head_ref` are similar in that they are annotations about +> `source` rather than a proper parameter. + +### Resolved dependencies + +The resolved dependencies MAY contain any artifacts known to be input to the +workflow, such as the specific versions of the virtual environments used. + +## Run details + +### Metadata + +The `invocationId` SHOULD be set to `github.server_url + "/actions/runs/" + +github.run_id + "/attempts/" + github.run_attempt`. + +## Example + +```json +{% include_relative example.json %} +``` + +Note: The `builder.id` in the example assumes that the build runs under +[slsa-github-generator](https://github.com/slsa-framework/slsa-github-generator). +If GitHub itself generated the provenance, the `id` would be different. + +## Version history + +### v0.1 + +Initial version diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1.0.md index 61bf40798..3c8b77314 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1.0.md @@ -70,7 +70,7 @@ The model is as follows: - Finally, the build process outputs one or more artifacts, identified by `subject`. -See [examples](#examples) for concrete examples. +For concrete examples, see [index of build types](#index-of-build-types). > **TODO:** Align with the [Build model](../spec/v1.0/terminology.md). @@ -96,7 +96,7 @@ This predicate follows the in-toto attestation [parsing rules]. Summary: {% include_relative v1.0.cue %} ``` -## Fields +### Fields _NOTE: This section describes the fields within `predicate`. For a description of the other top-level fields, such as `subject`, see [Statement]._ @@ -108,177 +108,12 @@ of the other top-level fields, such as `subject`, see [Statement]._ {% include_relative v1.0.proto %} ``` -## Examples +## Index of build types -> **TODO:** Move each of these definitions to a separate file that defines -> `buildType`. +The following is an partial index of build type definitions. Each contains a +complete example predicate. -> **TODO:** Use the same source repo in all examples for consistency. - -> **TODO:** Automatically verify all examples against the cue schema. - -### Container-based reproducible build - -> **WARNING:** This is not yet finalized. So far this is an idea under -> discussion to have the provenance document the input for reproducible builds, -> where the input is a container image + entry point. We will point to the -> actual documentation once the design is ready. - -> **TODO:** Move this to a separate file with a full description. - -```jsonc -"predicate": { - "buildDefinition": { - "buildType": "https://slsa.dev/container-based-build/v0.1?draft", - "externalParameters": { - // The thing to be built. - "source": { - "artifact": { - "uri": "git+https://github.com/bcoe/slsa-on-github-test@refs/heads/main", - "digest": { "sha1": "deadbeef" } - } - }, - // The container image in which to build it. - "buildImage": { - "artifact": { - "uri": "pkg:oci/builder-image?repository_url=gcr.io", - "digest": { "sha256": "53ca44..." } - }, - }, - // The path to the config file within "source". - "configFile": { - "value": "path/to/config.file" - } - } - "systemParameters": null, - "resolvedDependencies": null, - }, - "runDetails": { - "builder": { - "id": "..whoever ran the build...", - "builderDependencies": [ - { - "localName": "builderBinary", - "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", - "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } - } - ] - } - } -} -``` - -### GitHub Actions - -```jsonc -"predicate": { - "buildDefinition": { - // TODO: Replace this with a stable URL that points to documentation, - // not necessarily tied to this "generator". - "buildType": "https://github.com/slsa-framework/slsa-github-generator/generic@v1", - "externalParameters": { - // The repo containing the top-level workflow. - "source": { - "artifact": { - "uri": "git+https://github.com/laurentsimon/slsa-verifier-test-gen@refs/heads/main", - "digest": { - "sha1": "15bf79ea9c89fffbf5dd02c6b5b686b291bfcbd2" - } - } - }, - "workflow": { - "value": ".github/workflow/release.yml" - }, - // For workflow_dispatch events, the `input` field if present. - "input": null - }, - "systemParameters": { - "github_actor": { "value": "..." }, - "github_event_name": { "value": "workflow_dispatch" }, - // ... and so on ... - }, - "resolvedDependencies": [ - { - "uri": "https://github.com/actions/virtual-environments/releases/tag/ubuntu20/20220515.1" - } - ] - }, - "runDetails": { - // Provenance generated by the "SLSA GitHub Generator" reusable - // workflow. If GitHub generated the provenance itself, the - // buildDefinition would likely stay the same but this builder would - // identify GitHub rather than this workflow. - "builder": { - "id": "https://github.com/slsa-framework/slsa-github-generator/.github/workflows/builder_generic_slsa3.yml@refs/tags/v1.2.0", - "builderDependencies": [ - { - "localName": "builderBinary", - "uri": "git+https://github.com/slsa-framework/slsa-github-generator@refs/tags/v1.2.0", - "digest": { "sha1": "bdd89e60dc5387d8f819bebc702987956bcd4913" } - } - ] - }, - "metadata": { - // TODO: supply real example values here - "invocationId": "...", - "startedOn": "...", - "finishedOn": "..." - }, - "byproducts": null - } -} -``` - -### GitLab CI - -The GitLab CI team has implemented an [artifact attestation](https://docs.gitlab.com/ee/ci/runners/configure_runners.html#artifact-attestation) capability in their GitLab Runner 15.1 release. - -> **TODO** - -### Google Cloud Build - -> **TODO** - -### Tekton - -#### Tekton task run - -```jsonc -"predicate": { - "buildDefinition": { - "buildType": "https://tekton.dev/tekton-task/v0.1?draft", - "externalParameters": { - "source": { - "artifact": { - "uri": "git+https://github.com/tektoncd/catalog.git", - "digest": { "sha1": "962f9005350b1acb359558b6c5aafc420e0beb46" } - } - }, - "workflow": { - "value": "path/to/sample_taskrun.yaml" - } - }, - "systemParameters": null - "resolvedDependencies": [ - { - // The SystemConfiguration that Tekton was configured with. - "localName": "systemConfiguration", - "digest": { "sha256": "..." }, - "mediaType": "... some tekton config thing? ..." - } - ] - }, - "runDetails": { - // Set by the organization running Tekton - } -} -``` - -### Explicitly run commands - -WARNING: This is just a proof-of-concept. It is not yet standardized. - -> **TODO** +- [GitHub Actions Workflow](../../github-actions-workflow/v0.1/) ## Migrating from 0.2 @@ -388,15 +223,15 @@ Refactored to aid clarity and added `buildConfig`. The model is unchanged. - Added `buildConfig`, which can be used as an alternative to `configSource` to validate the configuration. -### slsa.dev/provenance +### rename: slsa.dev/provenance Renamed to "slsa.dev/provenance". -### 0.1.1 +### v0.1.1 - Added `metadata.buildInvocationId`. -### 0.1 +### v0.1 Initial version, named "in-toto.io/Provenance" diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1.0.proto index f56d473ff..7a8bafa1a 100644 --- a/docs/provenance/v1.0.proto +++ b/docs/provenance/v1.0.proto @@ -5,6 +5,12 @@ package slsa.v1; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; +// Note on REQUIRED vs OPTIONAL: an unqualified "REQUIRED" means that the field +// is mandatory to be semantically valid, while "REQUIRED at SLSA Build L1" +// means that the field is only required to be considered SLSA Build L1. In +// other words, a provenance file that omits a field marked "REQUIRED at SLSA +// Build L1" is valid but SLSA Build L0. +// // Design guidelines: // // - Use lowerCamelCase for field names because that is how the JSON fields @@ -49,6 +55,7 @@ message BuildDefinition { // - any other restrictions // - Explicit, unambiguous instructions for how to initiate the build given // this message. + // - Complete example provenance file. // // REQUIRED. string buildType = 1; @@ -125,12 +132,15 @@ message BuildDefinition { // TODO: If the dep is already pinned, does it need to be listed here? // TODO: Should this be a map instead of an array? Then each MUST be named // explicitly, which would be less ambiguous but more difficult. + // TODO: Clarify when something should go here vs builderDependencies. The + // choice is not obvious. More examples might help. // // OPTIONAL. repeated ArtifactReference resolvedDependencies = 1; } message ParameterValue { + // REQUIRED. oneof value { // A parameter that is a reference to an artifact. ArtifactReference artifact = 1; @@ -146,31 +156,41 @@ message ArtifactReference { // URL ([purl]). // // Example: `pkg:pypi/pyyaml@6.0` + // + // REQUIRED unless `digest` is set. string uri = 1; // [DigestSet] of cryptographic digests for the contents of this artifact. // TODO: Decide on hex vs base64 in #533 then document it here. + // + // REQUIRED unless `uri` is set. map digest = 2; // The name for this artifact local to the build. // // Example: `PyYAML-6.0.tar.gz` + // + // OPTIONAL. string localName = 3; // [URI] identifying the location that this artifact was downloaded from, if // different and not derivable from `uri`. // // Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` + // + // OPTIONAL. string downloadLocation = 4; // [Media Type] (aka MIME type) of this artifact. + // + // OPTIONAL. string mediaType = 5; } message RunDetails { // TODO: The following fields are the same as v0.2: // - // REQUIRED at SLSA Build L1 unless the id is implicit from the attestation + // REQUIRED at SLSA Build L1 unless the `id` is implicit from the attestation // envelope (e.g. public key). Builder builder = 1; @@ -206,7 +226,7 @@ message Builder { // For example, if the envelope identifies the build system, this might // identify the tenant project? // - // REQUIRED at SLSA Build L1 unless the id is implicit from the attestation + // REQUIRED at SLSA Build L1 unless it is implicit from the attestation // envelope (e.g. public key). string id = 1; From a4494fb6906e5cf7521fe34881796e0c063a875d Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 6 Jan 2023 17:22:13 -0500 Subject: [PATCH 37/44] Only include major version in provenance URL. Also move to a subdirectory (1) since we now have multiple files and (2) to make relative includes work correclty whether or not the URL has a trailing slash. Signed-off-by: Mark Lodato --- docs/_data/versions.yml | 2 +- docs/github-actions-workflow/v0.1/index.md | 2 +- docs/provenance/{v1.0.md => v1/index.md} | 10 +++++++--- docs/provenance/{v1_model.svg => v1/model.svg} | 0 docs/provenance/{v1.0.cue => v1/provenance.cue} | 0 docs/provenance/{v1.0.proto => v1/provenance.proto} | 0 docs/spec/v1.0/requirements.md | 5 +---- 7 files changed, 10 insertions(+), 9 deletions(-) rename docs/provenance/{v1.0.md => v1/index.md} (95%) rename docs/provenance/{v1_model.svg => v1/model.svg} (100%) rename docs/provenance/{v1.0.cue => v1/provenance.cue} (100%) rename docs/provenance/{v1.0.proto => v1/provenance.proto} (100%) diff --git a/docs/_data/versions.yml b/docs/_data/versions.yml index 2705321a5..233d13dc5 100644 --- a/docs/_data/versions.yml +++ b/docs/_data/versions.yml @@ -29,7 +29,7 @@ provenance: name: Version 0.1 v0.2: name: Version 0.2 - v1.0: + v1: name: Version 1.0 (DRAFT) draft: true current: v0.2 diff --git a/docs/github-actions-workflow/v0.1/index.md b/docs/github-actions-workflow/v0.1/index.md index 4ba8daccb..68fde4902 100644 --- a/docs/github-actions-workflow/v0.1/index.md +++ b/docs/github-actions-workflow/v0.1/index.md @@ -2,7 +2,7 @@ title: "Build Type: GitHub Actions Workflow" layout: standard hero_text: | - A [SLSA Provenance](../../provenance/v1.0) `buildType` that describes the + A [SLSA Provenance](../../provenance/v1) `buildType` that describes the execution of a GitHub Actions workflow. --- diff --git a/docs/provenance/v1.0.md b/docs/provenance/v1/index.md similarity index 95% rename from docs/provenance/v1.0.md rename to docs/provenance/v1/index.md index 3c8b77314..998fb9024 100644 --- a/docs/provenance/v1.0.md +++ b/docs/provenance/v1/index.md @@ -3,6 +3,10 @@ title: Provenance layout: standard hero_text: To trace software back to the source and define the moving parts in a complex supply chain, provenance needs to be there from the very beginning. It’s the verifiable information about software artifacts describing where, when and how something was produced. For higher SLSA levels and more resilient integrity guarantees, provenance requirements are stricter and need a deeper, more technical understanding of the predicate. --- + ## Purpose @@ -25,7 +29,7 @@ and the larger [in-toto attestation] framework. Provenance is an attestation that the `builder` produced the `subject` software artifacts through execution of the `buildDefinition`. -![Build Model](v1_model.svg) +![Build Model](model.svg) The model is as follows: @@ -93,7 +97,7 @@ This predicate follows the in-toto attestation [parsing rules]. Summary: > cue schema. ```javascript -{% include_relative v1.0.cue %} +{% include_relative provenance.cue %} ``` ### Fields @@ -105,7 +109,7 @@ of the other top-level fields, such as `subject`, see [Statement]._ > than a simple inclusion of the raw schema file. ```proto -{% include_relative v1.0.proto %} +{% include_relative provenance.proto %} ``` ## Index of build types diff --git a/docs/provenance/v1_model.svg b/docs/provenance/v1/model.svg similarity index 100% rename from docs/provenance/v1_model.svg rename to docs/provenance/v1/model.svg diff --git a/docs/provenance/v1.0.cue b/docs/provenance/v1/provenance.cue similarity index 100% rename from docs/provenance/v1.0.cue rename to docs/provenance/v1/provenance.cue diff --git a/docs/provenance/v1.0.proto b/docs/provenance/v1/provenance.proto similarity index 100% rename from docs/provenance/v1.0.proto rename to docs/provenance/v1/provenance.proto diff --git a/docs/spec/v1.0/requirements.md b/docs/spec/v1.0/requirements.md index 8edf117f7..272448717 100644 --- a/docs/spec/v1.0/requirements.md +++ b/docs/spec/v1.0/requirements.md @@ -193,10 +193,7 @@ Provenance. - *Authenticity:* No requirements. - *Accuracy:* No requirements. -**TODO:** Link to local copy of provenance (../../provenance/v1.0.md) -once [#525](https://github.com/slsa-framework/slsa/pull/525) is merged. - -[SLSA Provenance]: https://deploy-preview-525--slsa.netlify.app/provenance/v1.0 +[SLSA Provenance]: ../../provenance/v1/index.md ✓✓✓ Provenance Authenticated From 31094f7e8d4dab9c7e29b600e1cc623fd42a5963 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 18 Jan 2023 11:49:53 -0500 Subject: [PATCH 38/44] WIP: move to Markdown Signed-off-by: Mark Lodato --- docs/provenance/v1/index.md | 257 +++++++++++++++++++++++++++- docs/provenance/v1/provenance.proto | 236 ++----------------------- 2 files changed, 265 insertions(+), 228 deletions(-) diff --git a/docs/provenance/v1/index.md b/docs/provenance/v1/index.md index 998fb9024..403ca9ffd 100644 --- a/docs/provenance/v1/index.md +++ b/docs/provenance/v1/index.md @@ -93,25 +93,266 @@ This predicate follows the in-toto attestation [parsing rules]. Summary: ## Schema -> **TODO:** Get proper syntax highlighting for cue, and explain that this is a -> cue schema. +_NOTE: This section describes the fields within `predicate`. For a description +of the other top-level fields, such as `subject`, see [Statement]._ + + ```javascript {% include_relative provenance.cue %} ``` -### Fields +
+Protocol buffer schema -_NOTE: This section describes the fields within `predicate`. For a description -of the other top-level fields, such as `subject`, see [Statement]._ - -> **TODO:** Automatically parse the proto and render it directly here, rather -> than a simple inclusion of the raw schema file. +Link: [provenance.proto](provenance.proto) ```proto {% include_relative provenance.proto %} ``` +
+ +### Provenance + +[Provenance]: #provenance + +REQUIRED FIELDS: `buildDefinition`, `runDetails` + + +`buildDefinition` _object ([BuildDefinition])_ + +> The input to the build. +> +> The accuracy and completeness of this information is implied by +> `runDetails.builder.id`. + + +`runDetails` _object ([RunDetails])_ + +> Details specific to this particular execution of the build. + +### BuildDefinition + +[BuildDefinition]: #builddefinition + +REQUIRED FIELDS: `buildType`, `externalParameters` + + +`buildDefinition.buildType` _string ([TypeURI])_ + +> [TypeURI] indicating how to unambiguously interpret this message and +> initiate the build. +> +> This SHOULD resolve to a human-readable specification that includes: +> +> - Overall description. +> - List of all parameters, including: name, description, external vs system, +> type (artifact vs scalar vs...), required vs optional. +> - Explicit, unambiguous instructions for how to initiate the build given +> this message. +> - Complete example provenance file. + + +`buildDefinition.externalParameters` _map (string→[ParameterValue])_ + +> The set of top-level external inputs to the build. This SHOULD contain all +> the information necessary and sufficient to initialize the build and begin +> execution. "Top-level" means that it is not derived from another input. +> +> The key is a name whose interpretation depends on `buildType`. The key MUST be +> unique across `externalParameters` and `systemParameters`. The following +> conventional names are RECOMMENDED when appropriate: +> +> name | description +> -------- | ----------- +> `source` | The primary input to the build. +> `config` | The build configuration, if different from `source`. +> +> The build system SHOULD be designed to minimize the amount of information +> necessary here, in order to reduce fragility and ease verification. +> Consumers SHOULD have an expectation of what "good" looks like; the more +> information that they must check, the harder that task becomes. +> +> Guidelines: +> +> - Maximize the amount of information that is implicit from the meaning of +> `buildType`. In particular, any value that is boilerplate and the same +> for every build SHOULD be implicit. +> +> - Reduce parameters by moving configuration to input artifacts whenever +> possible. For example, instead of passing in compiler flags via a +> parameter, require them to live next to the source code or build +> configuration. +> +> - If possible, architect the build system to use this definition as its +> sole top-level input, in order to guarantee that the information is +> sufficient to run the build. +> +> - In some cases, the build configuration is evaluated client-side and +> sent over the wire, such that the build system cannot determine its +> origin. In those cases, the build system SHOULD serialize the +> configuration in a deterministic way and record the `digest` without a +> `uri`. This allows one to consider the client-side evaluation as a +> separate "build" with its own provenance, such that the verifier can +> chain the two provenance attestations together to determine the origin +> of the configuration. +> +> TODO: Describe how complete this must be at each SLSA level. +> +> TODO: Some requirement that the builder verifies the URI and that the +> verifier checks it against expectations? + + +`buildDefinition.systemParameters` _map (string→[ParameterValue])_ + +> Parameters of the build environment that were provided by the `builder` and +> not under external control. The primary intention of this field is for +> debugging, incident response, and vulnerability management. The values here +> MAY be necessary for reproducing the build. + + +`buildDefinition.resolvedDependencies` _array ([ArtifactReference])_ + +> Resolved dependencies needed at build time. For example, if the build +> script fetches and executes "example.com/foo.sh", which in turn fetches +> "example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" should be +> listed here. +> +> Any artifacts listed under `externalParameters` or `systemParameters` +> SHOULD NOT be repeated here. +> +> TODO: Explain what the purpose of this field is. Why do we need it? \ +> TODO: Explain how to determine what goes here. \ +> TODO: Explain that it's OK for it to be incomplete. \ +> TODO: If the dep is already pinned, does it need to be listed here? \ +> TODO: Should this be a map instead of an array? Then each MUST be named +> explicitly, which would be less ambiguous but more difficult. \ +> TODO: Clarify when something should go here vs builderDependencies. The +> choice is not obvious. More examples might help. + +### ParameterValue + +[ParameterValue]: #parametervalue + +**Exactly one** of the fields MUST be set. + + +`artifact` _object ([ArtifactReference])_ + +> A reference to an artifact. + + +`value` _string_ + +> A scalar value. For simplicity, only string values are supported. + +### ArtifactReference + +[ArtifactReference]: #artifactreference + +Either `uri` or `digest` is REQUIRED. + + +`uri` _string (URI)_ + +> [URI] describing where this artifact came from. When possible, this SHOULD +> be a universal and stable identifier, such as a source location or Package +> URL ([purl]). +> +> Example: `pkg:pypi/pyyaml@6.0` + + +`digest` _string ([DigestSet])_ + +> [DigestSet] of cryptographic digests for the contents of this artifact. +> +> TODO: Decide on hex vs base64 in #533 then document it here. + + +`localName` _string (URI), OPTIONAL_ + +> The name for this artifact local to the build. +> +> Example: `PyYAML-6.0.tar.gz` + + +`downloadLocation` _string (URI), OPTIONAL_ + +> [URI] identifying the location that this artifact was downloaded from, if +> different and not derivable from `uri`. +> +> Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` + + +`mediaType` _string ([MediaType]), OPTIONAL_ + +> Media Type (aka MIME type) of this artifact. + +### RunDetails + +[RunDetails]: #rundetails + +> TODO: The following fields are the same as v0.2: +> +> REQUIRED at SLSA Build L1 unless the `id` is implicit from the attestation +> envelope (e.g. public key). + Builder builder = 1; + +> TODO: description +> OPTIONAL. + BuildMetadata metadata = 2; + +> Additional artifacts generated during the build that should not be +> considered the "output" of the build but that may be needed during +> debugging or incident response. +> +> Possible use cases: +> +> - Logs generated during the build. +> - Fully evaluated build configuration. +> +> In most cases, this SHOULD NOT contain all intermediate files generated +> during the build. Instead, this should only contain files that are likely +> to be useful later and that cannot be easily reproduced. +> +> TODO: Do we need some recommendation for how to distinguish between +> byproducts? For example, should we recommend using `localName`? +> +> OPTIONAL. + repeated ArtifactReference byproducts = 3; + +### Builder + +[Builder]: #builder + +> [URI] ... (same as v0.2) +> TODO: In most cases this is implicit from the envelope layer (e.g. the +> public key or x.509 certificate), which is just one more thing to mess up. +> Can we rescope this to avoid the duplication and thus the security concern? +> For example, if the envelope identifies the build system, this might +> identify the tenant project? +> +> REQUIRED at SLSA Build L1 unless it is implicit from the attestation +> envelope (e.g. public key). + string id = 1; + +> TODO: Do we want to add this field? (#319) +> TODO: Should we merge this with builderDependencies into a combined +> "builderParameters"? Then arbitrary information can be stored. +> +> OPTIONAL. + map version = 2; + +> Dependencies used by the orchestrator that are not run within the workload +> and that should not affect the build, but may affect the provenance +> generation or security guarantees. +> TODO: Flesh out this model more. +> +> OPTIONAL. + repeated ArtifactReference builder_dependencies = 3; + ## Index of build types The following is an partial index of build type definitions. Each contains a diff --git a/docs/provenance/v1/provenance.proto b/docs/provenance/v1/provenance.proto index 7a8bafa1a..03b04c916 100644 --- a/docs/provenance/v1/provenance.proto +++ b/docs/provenance/v1/provenance.proto @@ -5,255 +5,51 @@ package slsa.v1; import "google/protobuf/struct.proto"; import "google/protobuf/timestamp.proto"; -// Note on REQUIRED vs OPTIONAL: an unqualified "REQUIRED" means that the field -// is mandatory to be semantically valid, while "REQUIRED at SLSA Build L1" -// means that the field is only required to be considered SLSA Build L1. In -// other words, a provenance file that omits a field marked "REQUIRED at SLSA -// Build L1" is valid but SLSA Build L0. -// -// Design guidelines: -// -// - Use lowerCamelCase for field names because that is how the JSON fields -// are named. The proto style guide advises use of snake_case in the .proto -// file but lowerCamelCase in JSON, but we believe that is confusing for -// readers (since we always serialize to JSON). It also makes it harder to -// reference other fields in comments since there are two ways to reference -// a field. -// -// - Prefer field names that are unique throughout the file, even if somewhat -// redundant with the parent field. This makes it easier to reference in -// documentation and reduces confusion. +// NOTE: While file uses snake_case as per the Protocol Buffers Style Guide, the +// provenance is always serialized using JSON with lowerCamelCase. Protobuf +// tooling performs this case conversion automatically. message Provenance { - // The input to the build. - // - // The accuracy and completeness of this information is implied by - // `runDetails.builder.id`. - // - // REQUIRED at SLSA Build L1. - BuildDefinition buildDefinition = 1; - - // Details specific to this particular execution of the build. - // - // REQUIRED at SLSA Build L1. - RunDetails runDetails = 2; + BuildDefinition build_definition = 1; + RunDetails run_details = 2; } message BuildDefinition { - // [TypeURI] indicating how to unambiguously interpret this message and - // initiate the build. - // - // This SHOULD resolve to a human-readable specification that includes: - // - // - Overall description. - // - List of all parameters, including: - // - name - // - description - // - external vs system - // - artifact vs value - // - required vs optional - // - any other restrictions - // - Explicit, unambiguous instructions for how to initiate the build given - // this message. - // - Complete example provenance file. - // - // REQUIRED. - string buildType = 1; - - // The set of top-level external inputs to the build. This SHOULD contain all - // the information necessary and sufficient to initialize the build and begin - // execution. "Top-level" means that it is not derived from another input. - // - // The key is a name whose interpretation depends on `buildType`. It MUST be - // unique across `externalParameters` and `systemParameters`. The following - // conventional names are RECOMMENDED when appropriate: - // - // name | description - // -------- | ----------- - // `source` | The primary input to the build. - // `config` | The build configuration, if different from `source`. - // - // The build system SHOULD be designed to minimize the amount of information - // necessary here, in order to reduce fragility and ease verification. - // Consumers SHOULD have an expectation of what "good" looks like; the more - // information that they must check, the harder that task becomes. - // - // Guidelines: - // - // - Maximize the amount of information that is implicit from the meaning of - // `buildType`. In particular, any value that is boilerplate and the same - // for every build SHOULD be implicit. - // - // - Reduce parameters by moving configuration to input artifacts whenever - // possible. For example, instead of passing in compiler flags via a - // parameter, require them to live next to the source code or build - // configuration. - // - // - If possible, architect the build system to use this definition as its - // sole top-level input, in order to guarantee that the information is - // sufficient to run the build. - // - // - In some cases, the build configuration is evaluated client-side and - // sent over the wire, such that the build system cannot determine its - // origin. In those cases, the build system SHOULD serialize the - // configuration in a deterministic way and record the `digest` without a - // `uri`. This allows one to consider the client-side evaluation as a - // separate "build" with its own provenance, such that the verifier can - // chain the two provenance attestations together to determine the origin - // of the configuration. - // - // TODO: Describe how complete this must be at each SLSA level. - // - // TODO: Some requirement that the builder verifies the URI and that the - // verifier checks it against expectations? - // - // REQUIRED at SLSA Build L1. - map externalParameters = 2; - - // Parameters of the build environment that were provided by the `builder` and - // not under external control. The primary intention of this field is for - // debugging, incident response, and vulnerability management. The values here - // MAY be necessary for reproducing the build. - // - // OPTIONAL. - map systemParameters = 3; - - // Resolved dependencies needed at build time. For example, if the build - // script fetches and executes "example.com/foo.sh", which in turn fetches - // "example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" should be - // listed here. - // - // Any artifacts listed under `externalParameters` or `systemParameters` - // SHOULD NOT be repeated here. - // - // TODO: Explain what the purpose of this field is. Why do we need it? - // TODO: Explain how to determine what goes here. - // TODO: Explain that it's OK for it to be incomplete. - // TODO: If the dep is already pinned, does it need to be listed here? - // TODO: Should this be a map instead of an array? Then each MUST be named - // explicitly, which would be less ambiguous but more difficult. - // TODO: Clarify when something should go here vs builderDependencies. The - // choice is not obvious. More examples might help. - // - // OPTIONAL. - repeated ArtifactReference resolvedDependencies = 1; + string build_type = 1; + map external_parameters = 2; + map system_parameters = 3; + repeated ArtifactReference resolved_dependencies = 4; } message ParameterValue { - // REQUIRED. oneof value { - // A parameter that is a reference to an artifact. ArtifactReference artifact = 1; - // A parameter that is a scalar value. For simplicity, only string values - // are supported. string value = 2; } } message ArtifactReference { - // [URI] describing where this artifact came from. When possible, this SHOULD - // be a universal and stable identifier, such as a source location or Package - // URL ([purl]). - // - // Example: `pkg:pypi/pyyaml@6.0` - // - // REQUIRED unless `digest` is set. string uri = 1; - - // [DigestSet] of cryptographic digests for the contents of this artifact. - // TODO: Decide on hex vs base64 in #533 then document it here. - // - // REQUIRED unless `uri` is set. map digest = 2; - - // The name for this artifact local to the build. - // - // Example: `PyYAML-6.0.tar.gz` - // - // OPTIONAL. - string localName = 3; - - // [URI] identifying the location that this artifact was downloaded from, if - // different and not derivable from `uri`. - // - // Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` - // - // OPTIONAL. - string downloadLocation = 4; - - // [Media Type] (aka MIME type) of this artifact. - // - // OPTIONAL. - string mediaType = 5; + string local_name = 3; + string download_location = 4; + string media_type = 5; } message RunDetails { - // TODO: The following fields are the same as v0.2: - // - // REQUIRED at SLSA Build L1 unless the `id` is implicit from the attestation - // envelope (e.g. public key). Builder builder = 1; - - // TODO: description - // OPTIONAL. BuildMetadata metadata = 2; - - // Additional artifacts generated during the build that should not be - // considered the "output" of the build but that may be needed during - // debugging or incident response. - // - // Possible use cases: - // - // - Logs generated during the build. - // - Fully evaluated build configuration. - // - // In most cases, this SHOULD NOT contain all intermediate files generated - // during the build. Instead, this should only contain files that are likely - // to be useful later and that cannot be easily reproduced. - // - // TODO: Do we need some recommendation for how to distinguish between - // byproducts? For example, should we recommend using `localName`? - // - // OPTIONAL. repeated ArtifactReference byproducts = 3; } message Builder { - // [URI] ... (same as v0.2) - // TODO: In most cases this is implicit from the envelope layer (e.g. the - // public key or x.509 certificate), which is just one more thing to mess up. - // Can we rescope this to avoid the duplication and thus the security concern? - // For example, if the envelope identifies the build system, this might - // identify the tenant project? - // - // REQUIRED at SLSA Build L1 unless it is implicit from the attestation - // envelope (e.g. public key). string id = 1; - - // TODO: Do we want to add this field? (#319) - // TODO: Should we merge this with builderDependencies into a combined - // "builderParameters"? Then arbitrary information can be stored. - // - // OPTIONAL. map version = 2; - - // Dependencies used by the orchestrator that are not run within the workload - // and that should not affect the build, but may affect the provenance - // generation or security guarantees. - // TODO: Flesh out this model more. - // - // OPTIONAL. - repeated ArtifactReference builderDependencies = 3; + repeated ArtifactReference builder_dependencies = 3; } message BuildMetadata { - // TODO: same as v0.2: - // OPTIONAL. - string invocationId = 1; - - // OPTIONAL. - google.protobuf.Timestamp startedOn = 2; - - // OPTIONAL. - google.protobuf.Timestamp finishedOn = 3; + string invocation_id = 1; + google.protobuf.Timestamp started_on = 2; + google.protobuf.Timestamp finished_on = 3; } From 472ba948863c2568e44643df6e2121f4bee7cb95 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 18 Jan 2023 16:25:25 -0500 Subject: [PATCH 39/44] Finish Markdown conversion, add other param types Signed-off-by: Mark Lodato --- .../github-actions-workflow/v0.1/example.json | 29 +- docs/github-actions-workflow/v0.1/index.md | 39 +- docs/provenance/v1/index.md | 475 +++++++++++------- docs/provenance/v1/provenance.cue | 8 +- docs/provenance/v1/provenance.proto | 9 +- 5 files changed, 346 insertions(+), 214 deletions(-) diff --git a/docs/github-actions-workflow/v0.1/example.json b/docs/github-actions-workflow/v0.1/example.json index 427c3bd90..297a2a3b0 100644 --- a/docs/github-actions-workflow/v0.1/example.json +++ b/docs/github-actions-workflow/v0.1/example.json @@ -4,20 +4,35 @@ "buildDefinition": { "buildType": "https://slsa.dev/github-actions-workflow/v0.1?draft", "externalParameters": { - "inputs_build_id": { "value": "123456768" }, - "inputs_deploy_target": { "value": "deployment_sys_1a" }, - "inputs_perform_deploy": { "value": "true" }, + "inputs": { + "mapValue": { + "build_id": "123456768", + "deploy_target": "deployment_sys_1a", + "perform_deploy": "true" + } + }, "source": { - "artifact": { + "artifactRef": { "uri": "git+https://github.com/octocat/hello-world@refs/heads/main", "digest": { "sha1": "c27d339ee6075c1f744c5d4b200f7901aad2c369" } } }, - "workflow_path": { "value": ".github/workflow/release.yml" } + "inputs": { + "mapValue": { + "mascot": "Mona" + } + }, + "workflowPath": { + "scalarValue": ".github/workflow/release.yml" + } }, "systemParameters": { - "github_actor": { "value": "MarkLodato" }, - "github_event_name": { "value": "workflow_dispatch" } + "github": { + "mapValue": { + "actor": "MarkLodato", + "event_name": "workflow_dispatch" + } + } }, "resolvedDependencies": [ { diff --git a/docs/github-actions-workflow/v0.1/index.md b/docs/github-actions-workflow/v0.1/index.md index 68fde4902..a8cc97eda 100644 --- a/docs/github-actions-workflow/v0.1/index.md +++ b/docs/github-actions-workflow/v0.1/index.md @@ -20,15 +20,15 @@ workflow, such as an action, a job, or a reusable workflow. ### External parameters -All external parameters are REQUIRED. +All external parameters are REQUIRED unless empty.
ParameterTypeDescription -
inputs_*string +
inputsmapValue -The [inputs context], with each `inputs.` renamed to `inputs_`. -Every non-empty input value MUST be recorded. Empty values SHOULD be omitted. +The [inputs context], with each value converted to string. Every non-empty input +value MUST be recorded. Empty values SHOULD be omitted. Note: Only `workflow_dispatch` events and reusable workflows have inputs. @@ -39,7 +39,12 @@ The git repository containing the top-level workflow YAML file. This can be computed from the [github context] using `"git+" + github.server_url + "/" + github.repository + "@" + github.ref`. -
workflow_pathstring +
varsvars + +The [vars context], with each value converted to string. Every non-empty input +value MUST be recorded. Empty values SHOULD be omitted. + +
workflowPathstring The path to the workflow YAML file within `source`. @@ -53,22 +58,22 @@ the path. See [getEntryPoint] for one possible implementation. [github context]: https://docs.github.com/en/actions/learn-github-actions/contexts#github-context [inputs context]: https://docs.github.com/en/actions/learn-github-actions/contexts#inputs-context +[vars context]: https://docs.github.com/en/actions/learn-github-actions/contexts#vars-context ### System parameters -> TODO: None of these are really "parameters", per se, but rather metadata -> about the build. Perhaps they should go in `runDetails` instead? The problem -> is that we don't have an appropriate field for it currently. - -All system parameters are OPTIONAL. Each corresponds to the [github context] -value of the same name, with `github.` renamed to `github_`. The -list only includes parameters that are likely to have an effect on the build and -that are not already captured elsewhere. +All system parameters are OPTIONAL. | Parameter | Type | Description | | -------------------- | -------- | ----------- | -| `github_actor` | string | The username of the user that triggered the initial workflow run. | -| `github_event_name` | string | The name of the event that triggered the workflow run. | +| `github` | mapValue | A subset of the [github context] as described below. Only includes parameters that are likely to have an effect on the build and that are not already captured elsewhere. | + +The `github` map SHOULD contains the following elements: + +| GitHub Context Parameter | Description | +| ------------------------------- | ----------- | +| `github.mapValue["actor"]` | The username of the user that triggered the initial workflow run. | +| `github.mapValue["event_name"]` | The name of the event that triggered the workflow run. | > TODO: What about `actor_id`, `repository_id`, and `repository_owner_id`? Those > are not part of the context so they're harder to describe, and the repository @@ -77,6 +82,10 @@ that are not already captured elsewhere. > Also `base_ref` and `head_ref` are similar in that they are annotations about > `source` rather than a proper parameter. +> TODO: None of these are really "parameters", per se, but rather metadata +> about the build. Perhaps they should go in `runDetails` instead? The problem +> is that we don't have an appropriate field for it currently. + ### Resolved dependencies The resolved dependencies MAY contain any artifacts known to be input to the diff --git a/docs/provenance/v1/index.md b/docs/provenance/v1/index.md index 403ca9ffd..609336dc5 100644 --- a/docs/provenance/v1/index.md +++ b/docs/provenance/v1/index.md @@ -24,6 +24,10 @@ requirements]. Understanding of SLSA [Software Attestations](/attestation-model) and the larger [in-toto attestation] framework. +The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", +"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be +interpreted as described in [RFC 2119](https://www.rfc-editor.org/rfc/rfc2119). + ## Model Provenance is an attestation that the `builder` produced the `subject` software @@ -35,7 +39,7 @@ The model is as follows: - Each build runs as an independent process on a multi-tenant platform. The `builder` is the identity of this platform, representing the transitive - closure of all entities that must be + closure of all entities that are [trusted](../spec/v1.0/principles.md#trust-systems-verify-artifacts) to faithfully run the build and record the provenance. (Note: The same model can be used for platform-less or single-tenant build systems.) @@ -118,240 +122,339 @@ Link: [provenance.proto](provenance.proto) [Provenance]: #provenance -REQUIRED FIELDS: `buildDefinition`, `runDetails` +REQUIRED for SLSA Build L1: `buildDefinition`, `runDetails` - -`buildDefinition` _object ([BuildDefinition])_ + +
FieldTypeDescription -> The input to the build. -> -> The accuracy and completeness of this information is implied by -> `runDetails.builder.id`. +
buildDefinition +BuildDefinition + +The input to the build. The accuracy and completeness are implied by `runDetails.builder.id`. - -`runDetails` _object ([RunDetails])_ +
runDetails +RunDetails -> Details specific to this particular execution of the build. +Details specific to this particular execution of the build. + +
### BuildDefinition [BuildDefinition]: #builddefinition -REQUIRED FIELDS: `buildType`, `externalParameters` +REQUIRED for SLSA Build L1: `buildType`, `externalParameters` - -`buildDefinition.buildType` _string ([TypeURI])_ + +
FieldTypeDescription -> [TypeURI] indicating how to unambiguously interpret this message and -> initiate the build. -> -> This SHOULD resolve to a human-readable specification that includes: -> -> - Overall description. -> - List of all parameters, including: name, description, external vs system, -> type (artifact vs scalar vs...), required vs optional. -> - Explicit, unambiguous instructions for how to initiate the build given -> this message. -> - Complete example provenance file. - - -`buildDefinition.externalParameters` _map (string→[ParameterValue])_ - -> The set of top-level external inputs to the build. This SHOULD contain all -> the information necessary and sufficient to initialize the build and begin -> execution. "Top-level" means that it is not derived from another input. -> -> The key is a name whose interpretation depends on `buildType`. The key MUST be -> unique across `externalParameters` and `systemParameters`. The following -> conventional names are RECOMMENDED when appropriate: -> -> name | description -> -------- | ----------- -> `source` | The primary input to the build. -> `config` | The build configuration, if different from `source`. -> -> The build system SHOULD be designed to minimize the amount of information -> necessary here, in order to reduce fragility and ease verification. -> Consumers SHOULD have an expectation of what "good" looks like; the more -> information that they must check, the harder that task becomes. -> -> Guidelines: -> -> - Maximize the amount of information that is implicit from the meaning of -> `buildType`. In particular, any value that is boilerplate and the same -> for every build SHOULD be implicit. -> -> - Reduce parameters by moving configuration to input artifacts whenever -> possible. For example, instead of passing in compiler flags via a -> parameter, require them to live next to the source code or build -> configuration. -> -> - If possible, architect the build system to use this definition as its -> sole top-level input, in order to guarantee that the information is -> sufficient to run the build. -> -> - In some cases, the build configuration is evaluated client-side and -> sent over the wire, such that the build system cannot determine its -> origin. In those cases, the build system SHOULD serialize the -> configuration in a deterministic way and record the `digest` without a -> `uri`. This allows one to consider the client-side evaluation as a -> separate "build" with its own provenance, such that the verifier can -> chain the two provenance attestations together to determine the origin -> of the configuration. -> -> TODO: Describe how complete this must be at each SLSA level. -> -> TODO: Some requirement that the builder verifies the URI and that the -> verifier checks it against expectations? +
buildType +string (TypeURI) - -`buildDefinition.systemParameters` _map (string→[ParameterValue])_ +Identifies the template for how to perform the build and interpret the +parameters and dependencies. -> Parameters of the build environment that were provided by the `builder` and -> not under external control. The primary intention of this field is for -> debugging, incident response, and vulnerability management. The values here -> MAY be necessary for reproducing the build. +The URI SHOULD resolve to a human-readable specification that includes: overall +description of the build type; a list of all parameters (name, description, +external vs system, artifact vs scalar vs..., required vs optional, etc.); +unambiguous instructions for how to initiate the build given this +BuildDefinition, and a complete example. Example: +https://slsa.dev/github-actions-workflow/v0.1 - -`buildDefinition.resolvedDependencies` _array ([ArtifactReference])_ +
externalParameters +map (string→ParameterValue) -> Resolved dependencies needed at build time. For example, if the build -> script fetches and executes "example.com/foo.sh", which in turn fetches -> "example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" should be -> listed here. -> -> Any artifacts listed under `externalParameters` or `systemParameters` -> SHOULD NOT be repeated here. -> -> TODO: Explain what the purpose of this field is. Why do we need it? \ -> TODO: Explain how to determine what goes here. \ -> TODO: Explain that it's OK for it to be incomplete. \ -> TODO: If the dep is already pinned, does it need to be listed here? \ -> TODO: Should this be a map instead of an array? Then each MUST be named -> explicitly, which would be less ambiguous but more difficult. \ -> TODO: Clarify when something should go here vs builderDependencies. The -> choice is not obvious. More examples might help. +The parameters that are under external control, such as those set by a user or +tenant of the build system. They MUST be complete at SLSA Build L3, meaning that +that there is no additional mechanism for an external party to influence the +build. (At lower SLSA Build levels, the completeness MAY be best effort.) + +The build system SHOULD be designed to minimize the size and complexity of +`externalParameters`, in order to reduce fragility and ease [verification]. +Consumers SHOULD have an expectation of what "good" looks like; the more +information that they need to check, the harder that task becomes. + +
systemParameters +map (string→ParameterValue) + +The parameters that are under the control of the `builder`. The primary +intention of this field is for debugging, incident response, and vulnerability +management. The values here MAY be necessary for reproducing the build. There is +no need to [verify][Verification] these parameters because the build system is +already trusted, and in many cases it is not practical to do so. + +
resolvedDependencies +array (ArtifactReference) + +Collection of artifacts needed at build time, aside from those listed in +`externalParameters` or `systemParameters`. For example, if the build script +fetches and executes "example.com/foo.sh", which in turn fetches +"example.com/bar.tar.gz", then both "foo.sh" and "bar.tar.gz" should be listed +here. + +
+ +The BuildDefinition describes all of the inputs to the build. It SHOULD contain +all the information necessary and sufficient to initialize the build and begin +execution. + +The `externalParameters` and `systemParameters` are the top-level inputs to the +template, meaning inputs not derived from another input. Each field is a map +from parameter name to [parameter value][ParameterValue]. The each parameter +name MUST be unique across `externalParameters` and `systemParameters`. The +following conventional names are RECOMMENDED when appropriate: + +- `source`: The primary input to the build. +- `config`: The build configuration, if different from `source`. + + +Guidelines: + +- Maximize the amount of information that is implicit from the meaning of + `buildType`. In particular, any value that is boilerplate and the same + for every build SHOULD be implicit. + +- Reduce parameters by moving configuration to input artifacts whenever + possible. For example, instead of passing in compiler flags via an external + parameter that has to be [verified][Verification] separately, require the + flags to live next to the source code or build configuration so that + verifying the latter automatically verifies the compiler flags. + +- If possible, architect the build system to use this definition as its + sole top-level input, in order to guarantee that the information is + sufficient to run the build. + +- In some cases, the build configuration is evaluated client-side and + sent over the wire, such that the build system cannot determine its + origin. In those cases, the build system SHOULD serialize the + configuration in a deterministic way and record the `digest` without a + `uri`. This allows one to consider the client-side evaluation as a + separate "build" with its own provenance, such that the verifier can + chain the two provenance attestations together to determine the origin + of the configuration. + +**TODO:** Explain the purpose of `resolvedDependencies`. Why do we need it? What +goes in it? Is it OK for it to be incomplete? If a dependency is already pinned, +does it need to be listed? How does one choose between `resolvedDependencies` +and `builderDependencies`? ### ParameterValue [ParameterValue]: #parametervalue -**Exactly one** of the fields MUST be set. - - -`artifact` _object ([ArtifactReference])_ +REQUIRED: exactly one of the fields MUST be set. -> A reference to an artifact. +| Field | Type | Description | +| ----- | ---- | ----------- | +| `artifactRef` | [ArtifactReference] | Reference to an artifact. | +| `scalarValue` | string | Scalar value. | +| `mapValue` | map (string→string) | Unordered collection of name/value pairs. | +| `arrayValue` | array (string) | Ordered collection of values. | - -`value` _string_ +For simplicity, only string values or collections of string values are +supported. -> A scalar value. For simplicity, only string values are supported. +> ⚠ **RFC:** The design of parameters is still not settled. We welcome feedback +> on this particular design and suggestions for alternatives. In particular: +> +> - How restrictive should we be? This is somewhat of a balance between making +> it easier for the builder vs [verifier][Verification]. A very restrictive +> type, such as only strings, makes it easier to set expectations but harder +> for a builder to describe reality. A very open type, such as an arbitrary +> JSON object, provides a lot of freedom to builders but possibly at the +> cost of complexity in terms of expectations. +> - Is there a better way to express types than using field names? +> - Do we need [ArtifactReference]? Would it instead make sense to just have +> the raw parameter here and then represent the digest in +> `resolvedDependencies`? What is the specific use case? +> +> Alternatives considered so far: +> +> - Only allow strings (difficult for many builders) +> - Allow strings, maps of strings, or arrays of strings (current design) +> - Allow arbitrary JSON (challenge: how do we do [ArtifactReference]?) ### ArtifactReference [ArtifactReference]: #artifactreference -Either `uri` or `digest` is REQUIRED. +REQUIRED: at least one of `uri` or `digest` - -`uri` _string (URI)_ + +
FieldTypeDescription -> [URI] describing where this artifact came from. When possible, this SHOULD -> be a universal and stable identifier, such as a source location or Package -> URL ([purl]). -> -> Example: `pkg:pypi/pyyaml@6.0` +
uri +string (URI) - -`digest` _string ([DigestSet])_ +URI describing where this artifact came from. When possible, this SHOULD +be a universal and stable identifier, such as a source location or Package +URL ([purl]). Example: `pkg:pypi/pyyaml@6.0` -> [DigestSet] of cryptographic digests for the contents of this artifact. -> -> TODO: Decide on hex vs base64 in #533 then document it here. +
digest +DigestSet - -`localName` _string (URI), OPTIONAL_ +One or more cryptographic digests of the contents of this artifact. -> The name for this artifact local to the build. -> -> Example: `PyYAML-6.0.tar.gz` +TODO: Decide on hex vs base64 in #533 then document it here. - -`downloadLocation` _string (URI), OPTIONAL_ +
localName +string -> [URI] identifying the location that this artifact was downloaded from, if -> different and not derivable from `uri`. -> -> Example: `https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` +The name for this artifact local to the build. Example: `PyYAML-6.0.tar.gz` + +
downloadLocation +string (URI) + +URI identifying the location that this artifact was downloaded from, if +different and not derivable from `uri`. Example: +`https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` + +
MediaType +string (MediaType) + +Media type (aka MIME type) of this artifact was interpreted. - -`mediaType` _string ([MediaType]), OPTIONAL_ +
-> Media Type (aka MIME type) of this artifact. +> ⚠ **RFC:** Do we need all these fields? Is this adding too much complexity? ### RunDetails [RunDetails]: #rundetails -> TODO: The following fields are the same as v0.2: -> -> REQUIRED at SLSA Build L1 unless the `id` is implicit from the attestation -> envelope (e.g. public key). - Builder builder = 1; +REQUIRED for SLSA Build L1: `builder` (unless `id` is implicit from the +attestation envelope) -> TODO: description -> OPTIONAL. - BuildMetadata metadata = 2; + +
FieldTypeDescription -> Additional artifacts generated during the build that should not be -> considered the "output" of the build but that may be needed during -> debugging or incident response. -> -> Possible use cases: -> -> - Logs generated during the build. -> - Fully evaluated build configuration. -> -> In most cases, this SHOULD NOT contain all intermediate files generated -> during the build. Instead, this should only contain files that are likely -> to be useful later and that cannot be easily reproduced. -> -> TODO: Do we need some recommendation for how to distinguish between -> byproducts? For example, should we recommend using `localName`? -> -> OPTIONAL. - repeated ArtifactReference byproducts = 3; +
builder +Builder + +Identifies the entity that executed the invocation, which is trusted to have +correctly performed the operation and populated this provenance. + +
metadata +BuildMetadata + +Metadata about this particular execution of the build. + +
byproducts +array (ArtifactReference) + +Additional artifacts generated during the build that should not be considered +the "output" of the build but that may be needed during debugging or incident +response. For example, this might reference logs generated during the build +and/or a digest of the fully evaluated build configuration. + +In most cases, this SHOULD NOT contain all intermediate files generated during +the build. Instead, this should only contain files that are likely to be useful +later and that cannot be easily reproduced. + +**TODO:** Do we need some recommendation for how to distinguish between +byproducts? For example, should we recommend using `localName`? + +
### Builder [Builder]: #builder -> [URI] ... (same as v0.2) -> TODO: In most cases this is implicit from the envelope layer (e.g. the -> public key or x.509 certificate), which is just one more thing to mess up. -> Can we rescope this to avoid the duplication and thus the security concern? -> For example, if the envelope identifies the build system, this might -> identify the tenant project? -> -> REQUIRED at SLSA Build L1 unless it is implicit from the attestation -> envelope (e.g. public key). - string id = 1; +REQUIRED for SLSA Build L1: `id` (unless implicit from the attestation envelope) -> TODO: Do we want to add this field? (#319) -> TODO: Should we merge this with builderDependencies into a combined -> "builderParameters"? Then arbitrary information can be stored. -> -> OPTIONAL. - map version = 2; + +
FieldTypeDescription -> Dependencies used by the orchestrator that are not run within the workload -> and that should not affect the build, but may affect the provenance -> generation or security guarantees. -> TODO: Flesh out this model more. -> -> OPTIONAL. - repeated ArtifactReference builder_dependencies = 3; +
id +string (TypeURI) + +URI indicating the transitive closure of the trusted builder. + +**TODO:** In most cases this is implicit from the envelope layer (e.g. the +public key or x.509 certificate), which is just one more thing to mess up. Can +we rescope this to avoid the duplication and thus the security concern? For +example, if the envelope identifies the build system, this might identify the +tenant project? + +**TODO:** Provide guidance on how to choose a URI, what scope it should have, +stability, how [verification] works, etc. + +
version +map (string→string) + +Version numbers of components of the builder. + +
builderDependencies +array (ArtifactReference) + +Dependencies used by the orchestrator that are not run within the workload and +that should not affect the build, but may affect the provenance generation or +security guarantees. + +**TODO:** Flesh out this model more. + +
+ +The builder represents the transitive closure of all the entities that are, by +necessity, [trusted](../spec/v1.0/principles.md#trust-systems-verify-artifacts) +to faithfully run the build and record the provenance. + +The `id` MUST reflect the trust base that consumers care about. How detailed to +be is a judgement call. For example, GitHub Actions supports both GitHub-hosted +runners and self-hosted runners. The GitHub-hosted runner might be a single +identity because it's all GitHub from the consumer's perspective. Meanwhile, +each self-hosted runner might have its own identity because not all runners are +trusted by all consumers. + +Consumers MUST accept only specific signer-builder pairs. For example, "GitHub" +can sign provenance for the "GitHub Actions" builder, and "Google" can sign +provenance for the "Google Cloud Build" builder, but "GitHub" cannot sign for +the "Google Cloud Build" builder. + +Design rationale: The builder is distinct from the signer because one signer +may generate attestations for more than one builder, as in the GitHub Actions +example above. The field is required, even if it is implicit from the signer, +to aid readability and debugging. It is an object to allow additional fields +in the future, in case one URI is not sufficient. + +> ⚠ **RFC:** Should we just allow builders to set arbitrary properties? We don't +> expect verifiers to use any of them, so maybe that's the simpler approach? Or +> have a `properties` that is an arbitrary object? (#319) + +### BuildMetadata + +[BuildMetadata]: #buildmetadata + +REQUIRED: (none) + + +
FieldTypeDescription + +
invocationId +string + +Identifies this particular build invocation, which can be useful for finding +associated logs or other ad-hoc analysis. The exact meaning and format is +defined by `builder.id`; by default it is treated as opaque and case-sensitive. +The value SHOULD be globally unique. + +
startedOn +string (Timestamp) + +The timestamp of when the build started. + +
finishedOn +string (Timestamp) + +The timestamp of when the build completed. + +
+ +## Verification + +[Verification]: #verification + +> **TODO:** Describe how clients are expected to verify the provenance. ## Index of build types @@ -379,7 +482,7 @@ meaning of each field is unchanged unless otherwise noted. // If the old `configSource` was the sole top-level input, // (i.e. containing the source or a pointer to the source): "source": { - "artifact": { + "artifactRef": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, }, @@ -388,10 +491,10 @@ meaning of each field is unchanged unless otherwise noted. // If the old `configSource` contained just build configuration // and a separate top-level input contained the source: "source": { - "artifact": old.materials[indexOfSource], + "artifactRef": old.materials[indexOfSource], }, "config": { - "artifact": { + "artifactRef": { "uri": old.invocation.configSource.uri, "digest": old.invocation.configSource.digest, }, diff --git a/docs/provenance/v1/provenance.cue b/docs/provenance/v1/provenance.cue index a5f9624ae..ec46c9ca7 100644 --- a/docs/provenance/v1/provenance.cue +++ b/docs/provenance/v1/provenance.cue @@ -29,9 +29,13 @@ } #ParameterValue: { - "artifact": #ArtifactReference + "artifactRef": #ArtifactReference } | { - "value": string + "scalarValue": string +} | { + "mapValue": { [string]: string } +} | { + "arrayValue": [ ...string ] } #ArtifactReference: { diff --git a/docs/provenance/v1/provenance.proto b/docs/provenance/v1/provenance.proto index 03b04c916..f77ff8158 100644 --- a/docs/provenance/v1/provenance.proto +++ b/docs/provenance/v1/provenance.proto @@ -22,10 +22,11 @@ message BuildDefinition { } message ParameterValue { - oneof value { - ArtifactReference artifact = 1; - string value = 2; - } + // Logically a oneof, but oneof doesn't support repeated or map. + ArtifactReference artifact_ref = 1; + string scalar_value = 2; + map map_value = 3; + repeated string array_value = 4; } message ArtifactReference { From fdce758e2cf99166664f775f1bc861408a49f00e Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 18 Jan 2023 16:42:13 -0500 Subject: [PATCH 40/44] address comments Signed-off-by: Mark Lodato --- docs/provenance/v1/index.md | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/docs/provenance/v1/index.md b/docs/provenance/v1/index.md index 609336dc5..94447b4ce 100644 --- a/docs/provenance/v1/index.md +++ b/docs/provenance/v1/index.md @@ -100,8 +100,10 @@ This predicate follows the in-toto attestation [parsing rules]. Summary: _NOTE: This section describes the fields within `predicate`. For a description of the other top-level fields, such as `subject`, see [Statement]._ - + ```javascript {% include_relative provenance.cue %} @@ -293,9 +295,10 @@ URL ([purl]). Example: `pkg:pypi/pyyaml@6.0`
digest DigestSet -One or more cryptographic digests of the contents of this artifact. +One or more cryptographic digests of the contents of this artifact. Example: +`{"sha256": "5f0689d54944564971f2811f9788218bfafb21aa20f532e6490004377dfa648f"}` -TODO: Decide on hex vs base64 in #533 then document it here. +**TODO:** Decide on hex vs base64 in #533 then document it here.
localName string @@ -312,7 +315,8 @@ different and not derivable from `uri`. Example:
MediaType string (MediaType) -Media type (aka MIME type) of this artifact was interpreted. +Media type (aka MIME type) of this artifact was interpreted. Example: +`application/json`
@@ -417,9 +421,18 @@ example above. The field is required, even if it is implicit from the signer, to aid readability and debugging. It is an object to allow additional fields in the future, in case one URI is not sufficient. -> ⚠ **RFC:** Should we just allow builders to set arbitrary properties? We don't -> expect verifiers to use any of them, so maybe that's the simpler approach? Or -> have a `properties` that is an arbitrary object? (#319) +> ⚠ **RFC:** Should we just allow builders to set arbitrary properties, rather +> than calling out `version` and `builderDependencies`? We don't expect +> verifiers to use any of them, so maybe that's the simpler approach? Or have a +> `properties` that is an arbitrary object? (#319) + +> ⚠ **RFC:** Do we want/need to identify the tenant of the build system, +> separately from the build system itself? If so, should it be a single `id` +> that combines both (e.g. +> `https://builder.example/tenants/company1.example/project1`), or two separate +> fields (e.g. `{"id": "https://builder.example", "tenant": +> "https://company1.example/project1"}`)? What would the use case be for this? +> How should [verification] work? ### BuildMetadata From aba878eea6f3b8da965bfe9e0ee521d0124ed880 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Wed, 18 Jan 2023 16:49:01 -0500 Subject: [PATCH 41/44] fix mdlint Signed-off-by: Mark Lodato --- docs/provenance/v1/index.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/provenance/v1/index.md b/docs/provenance/v1/index.md index 94447b4ce..232abff67 100644 --- a/docs/provenance/v1/index.md +++ b/docs/provenance/v1/index.md @@ -209,7 +209,6 @@ following conventional names are RECOMMENDED when appropriate: - `source`: The primary input to the build. - `config`: The build configuration, if different from `source`. - Guidelines: - Maximize the amount of information that is implicit from the meaning of From 1987abff2f7fb4fc93b2a723616aeb231113108f Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Thu, 19 Jan 2023 11:30:55 -0500 Subject: [PATCH 42/44] add TODO about creating other build types Signed-off-by: Mark Lodato --- docs/provenance/v1/index.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/provenance/v1/index.md b/docs/provenance/v1/index.md index 232abff67..7a4d85517 100644 --- a/docs/provenance/v1/index.md +++ b/docs/provenance/v1/index.md @@ -475,6 +475,9 @@ complete example predicate. - [GitHub Actions Workflow](../../github-actions-workflow/v0.1/) +**TODO:** Before marking the spec stable, add at least 1-2 other build types to +validate that the design is general enough to apply to other builders. + ## Migrating from 0.2 To migrate from [version 0.2][0.2] (`old`), use the following pseudocode. The From 40aeb7715c9a75629f1e449d2b009a81972ff089 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 20 Jan 2023 10:48:28 -0500 Subject: [PATCH 43/44] Fix typos in provenance v1.0 Signed-off-by: Mark Lodato --- docs/provenance/v1/index.md | 73 ++++++++++++++++--------------- docs/provenance/v1/provenance.cue | 2 +- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/docs/provenance/v1/index.md b/docs/provenance/v1/index.md index 7a4d85517..14f8a2a38 100644 --- a/docs/provenance/v1/index.md +++ b/docs/provenance/v1/index.md @@ -17,7 +17,7 @@ Describe how an artifact or set of artifacts was produced so that: - Others can rebuild the artifact, if desired. This predicate is the recommended way to satisfy the SLSA [provenance -requirements]. +requirements](/spec/v1.0/requirements#provenance-generation). ## Prerequisite @@ -39,10 +39,9 @@ The model is as follows: - Each build runs as an independent process on a multi-tenant platform. The `builder` is the identity of this platform, representing the transitive - closure of all entities that are - [trusted](../spec/v1.0/principles.md#trust-systems-verify-artifacts) to - faithfully run the build and record the provenance. (Note: The same model - can be used for platform-less or single-tenant build systems.) + closure of all entities that are [trusted] to faithfully run the build and + record the provenance. (Note: The same model can be used for platform-less + or single-tenant build systems.) - The build process is defined by a parameterized template, identified by `buildType`. Often a build platform only supports a single build type. For @@ -73,14 +72,14 @@ The model is as follows: - During execution, the build process MAY communicate with the build platform's control plane and/or build caches. This communication is not captured in the provenance but is subject to [SLSA - Requirements](../spec/v1.0/requirements.md). + Requirements](/spec/v1.0/requirements). - Finally, the build process outputs one or more artifacts, identified by `subject`. For concrete examples, see [index of build types](#index-of-build-types). -> **TODO:** Align with the [Build model](../spec/v1.0/terminology.md). +> **TODO:** Align with the [Build model](/spec/v1.0/terminology#build-model). ## Parsing rules @@ -130,12 +129,12 @@ REQUIRED for SLSA Build L1: `buildDefinition`, `runDetails` FieldTypeDescription buildDefinition -BuildDefinition +BuildDefinition The input to the build. The accuracy and completeness are implied by `runDetails.builder.id`. runDetails -RunDetails +RunDetails Details specific to this particular execution of the build. @@ -164,7 +163,7 @@ BuildDefinition, and a complete example. Example: https://slsa.dev/github-actions-workflow/v0.1 externalParameters -map (string→ParameterValue) +map (string→ParameterValue) The parameters that are under external control, such as those set by a user or tenant of the build system. They MUST be complete at SLSA Build L3, meaning that @@ -177,7 +176,7 @@ Consumers SHOULD have an expectation of what "good" looks like; the more information that they need to check, the harder that task becomes. systemParameters -map (string→ParameterValue) +map (string→ParameterValue) The parameters that are under the control of the `builder`. The primary intention of this field is for debugging, incident response, and vulnerability @@ -186,7 +185,7 @@ no need to [verify][Verification] these parameters because the build system is already trusted, and in many cases it is not practical to do so. resolvedDependencies -array (ArtifactReference) +array (ArtifactReference) Collection of artifacts needed at build time, aside from those listed in `externalParameters` or `systemParameters`. For example, if the build script @@ -289,36 +288,45 @@ REQUIRED: at least one of `uri` or `digest` URI describing where this artifact came from. When possible, this SHOULD be a universal and stable identifier, such as a source location or Package -URL ([purl]). Example: `pkg:pypi/pyyaml@6.0` +URL ([purl]). digest DigestSet -One or more cryptographic digests of the contents of this artifact. Example: -`{"sha256": "5f0689d54944564971f2811f9788218bfafb21aa20f532e6490004377dfa648f"}` +One or more cryptographic digests of the contents of this artifact. **TODO:** Decide on hex vs base64 in #533 then document it here. localName string -The name for this artifact local to the build. Example: `PyYAML-6.0.tar.gz` +The name for this artifact local to the build. downloadLocation string (URI) URI identifying the location that this artifact was downloaded from, if -different and not derivable from `uri`. Example: -`https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz` +different and not derivable from `uri`. -MediaType +mediaType string (MediaType) -Media type (aka MIME type) of this artifact was interpreted. Example: -`application/json` +Media type (aka MIME type) of this artifact was interpreted. +Example: + +```json +{ + "uri": "pkg:pypi/pyyaml@6.0", + "digest": {"sha256": "5f0689d54944564971f2811f9788218bfafb21aa20f532e6490004377dfa648f"}, + "localName": "PyYAML-6.0.tar.gz", + "downloadLocation": "https://files.pythonhosted.org/packages/36/2b/61d51a2c4f25ef062ae3f74576b01638bebad5e045f747ff12643df63844/PyYAML-6.0.tar.gz", + "mediaType": "application/gzip" +} +``` + > ⚠ **RFC:** Do we need all these fields? Is this adding too much complexity? ### RunDetails @@ -332,18 +340,18 @@ attestation envelope) FieldTypeDescription builder -Builder +Builder Identifies the entity that executed the invocation, which is trusted to have correctly performed the operation and populated this provenance. metadata -BuildMetadata +BuildMetadata Metadata about this particular execution of the build. byproducts -array (ArtifactReference) +array (ArtifactReference) Additional artifacts generated during the build that should not be considered the "output" of the build but that may be needed during debugging or incident @@ -388,7 +396,7 @@ stability, how [verification] works, etc. Version numbers of components of the builder. builderDependencies -array (ArtifactReference) +array (ArtifactReference) Dependencies used by the orchestrator that are not run within the workload and that should not affect the build, but may affect the provenance generation or @@ -399,8 +407,7 @@ security guarantees. The builder represents the transitive closure of all the entities that are, by -necessity, [trusted](../spec/v1.0/principles.md#trust-systems-verify-artifacts) -to faithfully run the build and record the provenance. +necessity, [trusted] to faithfully run the build and record the provenance. The `id` MUST reflect the trust base that consumers care about. How detailed to be is a judgement call. For example, GitHub Actions supports both GitHub-hosted @@ -480,8 +487,8 @@ validate that the design is general enough to apply to other builders. ## Migrating from 0.2 -To migrate from [version 0.2][0.2] (`old`), use the following pseudocode. The -meaning of each field is unchanged unless otherwise noted. +To migrate from [version 0.2](../v0.2.md) (`old`), use the following pseudocode. +The meaning of each field is unchanged unless otherwise noted. ```javascript { @@ -598,12 +605,8 @@ Renamed to "slsa.dev/provenance". Initial version, named "in-toto.io/Provenance" -[0.1]: v0.1.md -[0.2]: v0.2.md -[DigestSet]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#DigestSet -[ResourceURI]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#ResourceURI [Statement]: https://github.com/in-toto/attestation/blob/main/spec/README.md#statement -[Timestamp]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#Timestamp -[TypeURI]: https://github.com/in-toto/attestation/blob/main/spec/field_types.md#TypeURI [in-toto attestation]: https://github.com/in-toto/attestation [parsing rules]: https://github.com/in-toto/attestation/blob/main/spec/README.md#parsing-rules +[purl]: https://github.com/package-url/purl-spec +[trusted]: /spec/v1.0/principles#trust-systems-verify-artifacts diff --git a/docs/provenance/v1/provenance.cue b/docs/provenance/v1/provenance.cue index ec46c9ca7..20c406e4e 100644 --- a/docs/provenance/v1/provenance.cue +++ b/docs/provenance/v1/provenance.cue @@ -4,7 +4,7 @@ "subject": [...], // Predicate: - "predicateType": "https://slsa.dev/provenance/v1.0?draft", + "predicateType": "https://slsa.dev/provenance/v1?draft", "predicate": { "buildDefinition": { "buildType": string, From 42100743ec0b08cdf99dcb0a243b3bfe9e95e264 Mon Sep 17 00:00:00 2001 From: Mark Lodato Date: Fri, 20 Jan 2023 13:02:37 -0500 Subject: [PATCH 44/44] drop .md from link Signed-off-by: Mark Lodato --- docs/provenance/v1/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/provenance/v1/index.md b/docs/provenance/v1/index.md index 14f8a2a38..8c1dd3799 100644 --- a/docs/provenance/v1/index.md +++ b/docs/provenance/v1/index.md @@ -487,7 +487,7 @@ validate that the design is general enough to apply to other builders. ## Migrating from 0.2 -To migrate from [version 0.2](../v0.2.md) (`old`), use the following pseudocode. +To migrate from [version 0.2](../v0.2) (`old`), use the following pseudocode. The meaning of each field is unchanged unless otherwise noted. ```javascript