diff --git a/README.md b/README.md index c6d71e2..cf9cfa0 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ ssri.checkData(fs.readFileSync('./my-file'), parsed) // => true * Optional use of reserved `option-expression` syntax. * Multiple entries for the same algorithm. * Object-based integrity string manipulation. +* Optional strict parsing that follows the spec as closely as possible. ### Contributing @@ -69,10 +70,12 @@ jump in if you'd like to, or even ask us questions if something isn't clear. ### API -#### `> ssri.parse(integrityString) -> Integrity` +#### `> ssri.parse(sri, [opts]) -> Integrity` -Parses an `integrity` string into an `Integrity` data structure. The resulting -object has this shape: +Parses `sri` into an `Integrity` data structure. `sri` can be an integrity +string, an `IntegrityMetadata`-like with `digest` and `algorithm` fields and an +optional `options` field, or an `Integrity`-like object. The resulting object +will be an `Integrity` instance that has this shape: ```javascript { @@ -84,13 +87,21 @@ object has this shape: } ``` +If `opts.strict` is truthy, the resulting object will be filtered such that +it strictly follows the Subresource Integrity spec, throwing away any entries +with any invalid components. This also means a restricted set of algorithms +will be used -- the spec limits them to `sha256`, `sha384`, and `sha512`. + +Strict mode is recommended if the integrity strings are intended for use in +browsers, or in other situations where strict adherence to the spec is needed. + ##### Example ```javascript ssri.parse('sha512-9KhgCRIx/AmzC8xqYJTZRrnO8OW2Pxyl2DIMZSBOr0oDvtEFyht3xpp71j/r/pAe1DM+JI/A+line3jUBgzQ7A==?foo') // -> Integrity ``` -#### `> Integrity#concat(otherIntegrity) -> Integrity` +#### `> Integrity#concat(otherIntegrity, [opts]) -> Integrity` Concatenates an `Integrity` object with another IntegrityLike, or a string representing integrity metadata. @@ -98,6 +109,9 @@ representing integrity metadata. This is functionally equivalent to concatenating the string format of both integrity arguments, and calling [`ssri.parse`](#ssri-parse) on the new string. +If `opts.strict` is true, the new `Integrity` will be created using strict +parsing rules. See [`ssri.parse`](#parse). + ##### Example ```javascript @@ -113,14 +127,17 @@ const mobileIntegrity = ssri.fromData(fs.readFileSync('./index.mobile.js')) desktopIntegrity.concat(mobileIntegrity) ``` -#### `> Integrity#toString([sep=' ']) -> String` +#### `> Integrity#toString([opts]) -> String` Returns the string representation of an `Integrity` object. All metadata entries -will be concatenated in the string by `sep`. +will be concatenated in the string by `opts.sep`, which defaults to `' '`. If you want to serialize an object that didn't from from an `ssri` function, use [`ssri.serialize()`](#serialize). +If `opts.strict` is true, the integrity string will be created using strict +parsing rules. See [`ssri.parse`](#parse). + ##### Example ```javascript @@ -129,25 +146,19 @@ const integrity = 'sha512-9KhgCRIx/AmzC8xqYJTZRrnO8OW2Pxyl2DIMZSBOr0oDvtEFyht3xp ssri.parse(integrity).toString() === integrity ``` -#### `> ssri.serialize(integrityObj, [sep=' ']) -> String` +#### `> ssri.serialize(sri, [opts]) -> String` This function is identical to [`Integrity#toString()`](#integrity-to-string), -except it can be used on _any_ object resembling the shape of either an -`Integrity` or an `IntegrityMedatada` object. - -If `IntegrityLike` has both `.algorithm` and `.digest` properties, it will be -serialized as a single integrity entry. That is, `-`, along -with `?` if the object has an `options` property. +except it can be used on _any_ object that [`parse`](#parse) can handle -- that +is, a string, an `IntegrityMetadata`-like, or an `Integrity`-like. -Otherwise, the `IntegrityLike` will be treated as a full `Integrity` object, -where every key on the object will be interpreted as an algorithm, and each -value should be an array of metadata objects (with `algorithm` and `digest` -properties) corresponding to that key. - -The `sep` option defines the string to use when joining multiple entries +The `opts.sep` option defines the string to use when joining multiple entries together. To be spec-compliant, this _must_ be whitespace. The default is a single space (`' '`). +If `opts.strict` is true, the integrity string will be created using strict +parsing rules. See [`ssri.parse`](#parse). + ##### Example ```javascript @@ -190,6 +201,9 @@ strings that will be added to all generated integrity metadata generated by specified semantics besides being `?`-separated. Use at your own risk, and probably avoid if your integrity strings are meant to be used with browsers. +If `opts.strict` is true, the integrity object will be created using strict +parsing rules. See [`ssri.parse`](#parse). + ##### Example ```javascript @@ -214,6 +228,9 @@ part of [`ssri.fromData`](#from-data). Additionally, `opts.Promise` may be passed in to inject a Promise library of choice. By default, ssri will use Node's built-in Promises. +If `opts.strict` is true, the integrity object will be created using strict +parsing rules. See [`ssri.parse`](#parse). + ##### Example ```javascript @@ -227,8 +244,8 @@ ssri.fromStream(fs.createReadStream('index.js'), { #### `> ssri.checkData(data, sri, [opts]) -> Algorithm|false` Verifies `data` integrity against an `sri` argument. `data` may be either a -`String` or a `Buffer`, and `sri` can be any `Integrity`-like, or a `String` -that [`ssri.parse`](#parse) can turn into one. +`String` or a `Buffer`, and `sri` can be any subresource integrity +representation that [`ssri.parse`](#parse) can handle. If verification succeeds, `checkData` will return the name of the algorithm that was used for verification (a truthy value). Otherwise, it will return `false`. @@ -252,8 +269,8 @@ ssri.checkData(data, 'sha1-BaDDigEST') // -> false #### `> ssri.checkStream(stream, sri, [opts]) -> Promise` Verifies the contents of `stream` against an `sri` argument. `stream` will be -consumed in its entirety by this process. `sri` can be any `Integrity`-like, or -a `String` that [`ssri.parse`](#parse) can turn into one. +consumed in its entirety by this process. `sri` can be any subresource integrity +representation that [`ssri.parse`](#parse) can handle. `checkStream` will return a Promise that either resolves to the string name of the algorithm that verification was done with, or, if the verification fails or @@ -293,8 +310,8 @@ ssri.checkStream( #### `> createCheckerStream(sri, [opts]) -> CheckerStream` Returns a `Through` stream that data can be piped through in order to check it -against `sri`. `sri` can be any `Integrity`-like, or a `String` that -[`ssri.parse`](#parse) can turn into one. +against `sri`. `sri` can be any subresource integrity representation that +[`ssri.parse`](#parse) can handle. If verification fails, the returned stream will error with an `EBADCHECKSUM` error code. diff --git a/index.js b/index.js index ad10bdc..d8acfba 100644 --- a/index.js +++ b/index.js @@ -3,52 +3,101 @@ const crypto = require('crypto') const Transform = require('stream').Transform -const SRI_REGEX = /([^-]+)-([^?]+)([?\S*]*)/ +const SPEC_ALGORITHMS = ['sha256', 'sha384', 'sha512'] + +const BASE64_REGEX = /[a-z0-9+/]+(?:=?=?)/i +const SRI_REGEX = /^([^-]+)-([^?]+)([?\S*]*)$/ +const STRICT_SRI_REGEX = /^([^-]+)-([A-Za-z0-9+/]+(?:=?=?))([?\x21-\x7E]*)$/ +const VCHAR_REGEX = /[\x21-\x7E]+/ class IntegrityMetadata { - constructor (metadata) { - this.source = metadata + constructor (metadata, opts) { + const strict = !!(opts && opts.strict) + this.source = metadata.trim() // 3.1. Integrity metadata // https://w3c.github.io/webappsec-subresource-integrity/#integrity-metadata-description - const match = metadata.match(SRI_REGEX) + const match = this.source.match( + strict + ? STRICT_SRI_REGEX + : SRI_REGEX + ) if (!match) { return } + if (strict && !SPEC_ALGORITHMS.some(a => a === match[1])) { return } this.algorithm = match[1] this.digest = match[2] const rawOpts = match[3] this.options = rawOpts ? rawOpts.slice(1).split('?') : [] } - toString () { - const opts = this.options && this.options.length + toString (opts) { + if (opts && opts.strict) { + // Strict mode enforces the standard as close to the foot of the + // letter as it can. + if (!( + // The spec has very restricted productions for algorithms. + // https://www.w3.org/TR/CSP2/#source-list-syntax + SPEC_ALGORITHMS.some(x => x === this.algorithm) && + // Usually, if someone insists on using a "different" base64, we + // leave it as-is, since there's multiple standards, and the + // specified is not a URL-safe variant. + // https://www.w3.org/TR/CSP2/#base64_value + this.digest.match(BASE64_REGEX) && + // Option syntax is strictly visual chars. + // https://w3c.github.io/webappsec-subresource-integrity/#grammardef-option-expression + // https://tools.ietf.org/html/rfc5234#appendix-B.1 + (this.options || []).every(opt => opt.match(VCHAR_REGEX)) + )) { + return '' + } + } + const options = this.options && this.options.length ? `?${this.options.join('?')}` : '' - return `${this.algorithm}-${this.digest}${opts}` + return `${this.algorithm}-${this.digest}${options}` } } class Integrity { - toString (sep) { - sep = sep || ' ' + toString (opts) { + opts = opts || {} + let sep = opts.sep || ' ' + if (opts.strict) { + // Entries must be separated by whitespace, according to spec. + sep = sep.replace(/\S+/g, ' ') + } return Object.keys(this).map(k => { return this[k].map(meta => { - return IntegrityMetadata.prototype.toString.call(meta) - }) - }).join(sep) + return IntegrityMetadata.prototype.toString.call(meta, opts) + }).filter(x => x.length).join(sep) + }).filter(x => x.length).join(sep) } - concat (integrity) { + concat (integrity, opts) { const other = typeof integrity === 'string' ? integrity : serialize(integrity) - return parse(`${this.toString()} ${other}`) + return parse(`${this.toString()} ${other}`, opts) } } module.exports.parse = parse -function parse (integrity) { +function parse (sri, opts) { + opts = opts || {} + if (typeof sri === 'string') { + return _parse(sri, opts) + } else if (sri.algorithm && sri.digest) { + const fullSri = new Integrity() + fullSri[sri.algorithm] = [sri] + return _parse(serialize(fullSri, opts), opts) + } else { + return _parse(serialize(sri, opts), opts) + } +} + +function _parse (integrity, opts) { // 3.4.3. Parse metadata // https://w3c.github.io/webappsec-subresource-integrity/#parse-metadata return integrity.trim().split(/\s+/).reduce((acc, string) => { - const metadata = new IntegrityMetadata(string) + const metadata = new IntegrityMetadata(string, opts) if (metadata.algorithm && metadata.digest) { const algo = metadata.algorithm if (!acc[algo]) { acc[algo] = [] } @@ -60,11 +109,11 @@ function parse (integrity) { module.exports.serialize = serialize module.exports.unparse = serialize -function serialize (obj, sep) { +function serialize (obj, opts) { if (obj.algorithm && obj.digest) { - return IntegrityMetadata.prototype.toString.call(obj) + return IntegrityMetadata.prototype.toString.call(obj, opts) } else { - return Integrity.prototype.toString.call(obj, sep) + return Integrity.prototype.toString.call(obj, opts) } } @@ -77,7 +126,10 @@ function fromData (data, opts) { : '' return algorithms.reduce((acc, algo) => { const digest = crypto.createHash(algo).update(data).digest('base64') - const meta = new IntegrityMetadata(`${algo}-${digest}${optString}`) + const meta = new IntegrityMetadata( + `${algo}-${digest}${optString}`, + opts + ) if (meta.algorithm && meta.digest) { const algo = meta.algorithm if (!acc[algo]) { acc[algo] = [] } @@ -103,7 +155,10 @@ function fromStream (stream, opts) { resolve(algorithms.reduce((acc, algo, i) => { const hash = hashes[i] const digest = hash.digest('base64') - const meta = new IntegrityMetadata(`${algo}-${digest}${optString}`) + const meta = new IntegrityMetadata( + `${algo}-${digest}${optString}`, + opts + ) if (meta.algorithm && meta.digest) { const algo = meta.algorithm if (!acc[algo]) { acc[algo] = [] } @@ -118,13 +173,7 @@ function fromStream (stream, opts) { module.exports.checkData = checkData function checkData (data, sri, opts) { opts = opts || {} - if (typeof sri === 'string') { - sri = parse(sri) - } else if (sri.algorithm && sri.digest) { - const fullSri = new Integrity() - fullSri[sri.algorithm] = [sri] - sri = fullSri - } + sri = parse(sri, opts) const pickAlgorithm = opts.pickAlgorithm || getPrioritizedHash const algorithm = Object.keys(sri).reduce((acc, algo) => { return pickAlgorithm(acc, algo) || acc @@ -152,13 +201,7 @@ function checkStream (stream, sri, opts) { module.exports.createCheckerStream = createCheckerStream function createCheckerStream (sri, opts) { opts = opts || {} - if (typeof sri === 'string') { - sri = parse(sri) - } else if (sri.algorithm && sri.digest) { - const fullSri = new Integrity() - fullSri[sri.algorithm] = [sri] - sri = fullSri - } + sri = parse(sri, opts) const pickAlgorithm = opts.pickAlgorithm || getPrioritizedHash const algorithm = Object.keys(sri).reduce((acc, algo) => { return pickAlgorithm(acc, algo) || acc