diff --git a/README.md b/README.md
index c6d71e2..cf9cfa0 100644
--- a/README.md
+++ b/README.md
@@ -58,6 +58,7 @@ ssri.checkData(fs.readFileSync('./my-file'), parsed) // => true
* Optional use of reserved `option-expression` syntax.
* Multiple entries for the same algorithm.
* Object-based integrity string manipulation.
+* Optional strict parsing that follows the spec as closely as possible.
### Contributing
@@ -69,10 +70,12 @@ jump in if you'd like to, or even ask us questions if something isn't clear.
### API
-#### `> ssri.parse(integrityString) -> Integrity`
+#### `> ssri.parse(sri, [opts]) -> Integrity`
-Parses an `integrity` string into an `Integrity` data structure. The resulting
-object has this shape:
+Parses `sri` into an `Integrity` data structure. `sri` can be an integrity
+string, an `IntegrityMetadata`-like with `digest` and `algorithm` fields and an
+optional `options` field, or an `Integrity`-like object. The resulting object
+will be an `Integrity` instance that has this shape:
```javascript
{
@@ -84,13 +87,21 @@ object has this shape:
}
```
+If `opts.strict` is truthy, the resulting object will be filtered such that
+it strictly follows the Subresource Integrity spec, throwing away any entries
+with any invalid components. This also means a restricted set of algorithms
+will be used -- the spec limits them to `sha256`, `sha384`, and `sha512`.
+
+Strict mode is recommended if the integrity strings are intended for use in
+browsers, or in other situations where strict adherence to the spec is needed.
+
##### Example
```javascript
ssri.parse('sha512-9KhgCRIx/AmzC8xqYJTZRrnO8OW2Pxyl2DIMZSBOr0oDvtEFyht3xpp71j/r/pAe1DM+JI/A+line3jUBgzQ7A==?foo') // -> Integrity
```
-#### `> Integrity#concat(otherIntegrity) -> Integrity`
+#### `> Integrity#concat(otherIntegrity, [opts]) -> Integrity`
Concatenates an `Integrity` object with another IntegrityLike, or a string
representing integrity metadata.
@@ -98,6 +109,9 @@ representing integrity metadata.
This is functionally equivalent to concatenating the string format of both
integrity arguments, and calling [`ssri.parse`](#ssri-parse) on the new string.
+If `opts.strict` is true, the new `Integrity` will be created using strict
+parsing rules. See [`ssri.parse`](#parse).
+
##### Example
```javascript
@@ -113,14 +127,17 @@ const mobileIntegrity = ssri.fromData(fs.readFileSync('./index.mobile.js'))
desktopIntegrity.concat(mobileIntegrity)
```
-#### `> Integrity#toString([sep=' ']) -> String`
+#### `> Integrity#toString([opts]) -> String`
Returns the string representation of an `Integrity` object. All metadata entries
-will be concatenated in the string by `sep`.
+will be concatenated in the string by `opts.sep`, which defaults to `' '`.
If you want to serialize an object that didn't from from an `ssri` function,
use [`ssri.serialize()`](#serialize).
+If `opts.strict` is true, the integrity string will be created using strict
+parsing rules. See [`ssri.parse`](#parse).
+
##### Example
```javascript
@@ -129,25 +146,19 @@ const integrity = 'sha512-9KhgCRIx/AmzC8xqYJTZRrnO8OW2Pxyl2DIMZSBOr0oDvtEFyht3xp
ssri.parse(integrity).toString() === integrity
```
-#### `> ssri.serialize(integrityObj, [sep=' ']) -> String`
+#### `> ssri.serialize(sri, [opts]) -> String`
This function is identical to [`Integrity#toString()`](#integrity-to-string),
-except it can be used on _any_ object resembling the shape of either an
-`Integrity` or an `IntegrityMedatada` object.
-
-If `IntegrityLike` has both `.algorithm` and `.digest` properties, it will be
-serialized as a single integrity entry. That is, `-`, along
-with `?` if the object has an `options` property.
+except it can be used on _any_ object that [`parse`](#parse) can handle -- that
+is, a string, an `IntegrityMetadata`-like, or an `Integrity`-like.
-Otherwise, the `IntegrityLike` will be treated as a full `Integrity` object,
-where every key on the object will be interpreted as an algorithm, and each
-value should be an array of metadata objects (with `algorithm` and `digest`
-properties) corresponding to that key.
-
-The `sep` option defines the string to use when joining multiple entries
+The `opts.sep` option defines the string to use when joining multiple entries
together. To be spec-compliant, this _must_ be whitespace. The default is a
single space (`' '`).
+If `opts.strict` is true, the integrity string will be created using strict
+parsing rules. See [`ssri.parse`](#parse).
+
##### Example
```javascript
@@ -190,6 +201,9 @@ strings that will be added to all generated integrity metadata generated by
specified semantics besides being `?`-separated. Use at your own risk, and
probably avoid if your integrity strings are meant to be used with browsers.
+If `opts.strict` is true, the integrity object will be created using strict
+parsing rules. See [`ssri.parse`](#parse).
+
##### Example
```javascript
@@ -214,6 +228,9 @@ part of [`ssri.fromData`](#from-data).
Additionally, `opts.Promise` may be passed in to inject a Promise library of
choice. By default, ssri will use Node's built-in Promises.
+If `opts.strict` is true, the integrity object will be created using strict
+parsing rules. See [`ssri.parse`](#parse).
+
##### Example
```javascript
@@ -227,8 +244,8 @@ ssri.fromStream(fs.createReadStream('index.js'), {
#### `> ssri.checkData(data, sri, [opts]) -> Algorithm|false`
Verifies `data` integrity against an `sri` argument. `data` may be either a
-`String` or a `Buffer`, and `sri` can be any `Integrity`-like, or a `String`
-that [`ssri.parse`](#parse) can turn into one.
+`String` or a `Buffer`, and `sri` can be any subresource integrity
+representation that [`ssri.parse`](#parse) can handle.
If verification succeeds, `checkData` will return the name of the algorithm that
was used for verification (a truthy value). Otherwise, it will return `false`.
@@ -252,8 +269,8 @@ ssri.checkData(data, 'sha1-BaDDigEST') // -> false
#### `> ssri.checkStream(stream, sri, [opts]) -> Promise`
Verifies the contents of `stream` against an `sri` argument. `stream` will be
-consumed in its entirety by this process. `sri` can be any `Integrity`-like, or
-a `String` that [`ssri.parse`](#parse) can turn into one.
+consumed in its entirety by this process. `sri` can be any subresource integrity
+representation that [`ssri.parse`](#parse) can handle.
`checkStream` will return a Promise that either resolves to the string name of
the algorithm that verification was done with, or, if the verification fails or
@@ -293,8 +310,8 @@ ssri.checkStream(
#### `> createCheckerStream(sri, [opts]) -> CheckerStream`
Returns a `Through` stream that data can be piped through in order to check it
-against `sri`. `sri` can be any `Integrity`-like, or a `String` that
-[`ssri.parse`](#parse) can turn into one.
+against `sri`. `sri` can be any subresource integrity representation that
+[`ssri.parse`](#parse) can handle.
If verification fails, the returned stream will error with an `EBADCHECKSUM`
error code.
diff --git a/index.js b/index.js
index ad10bdc..d8acfba 100644
--- a/index.js
+++ b/index.js
@@ -3,52 +3,101 @@
const crypto = require('crypto')
const Transform = require('stream').Transform
-const SRI_REGEX = /([^-]+)-([^?]+)([?\S*]*)/
+const SPEC_ALGORITHMS = ['sha256', 'sha384', 'sha512']
+
+const BASE64_REGEX = /[a-z0-9+/]+(?:=?=?)/i
+const SRI_REGEX = /^([^-]+)-([^?]+)([?\S*]*)$/
+const STRICT_SRI_REGEX = /^([^-]+)-([A-Za-z0-9+/]+(?:=?=?))([?\x21-\x7E]*)$/
+const VCHAR_REGEX = /[\x21-\x7E]+/
class IntegrityMetadata {
- constructor (metadata) {
- this.source = metadata
+ constructor (metadata, opts) {
+ const strict = !!(opts && opts.strict)
+ this.source = metadata.trim()
// 3.1. Integrity metadata
// https://w3c.github.io/webappsec-subresource-integrity/#integrity-metadata-description
- const match = metadata.match(SRI_REGEX)
+ const match = this.source.match(
+ strict
+ ? STRICT_SRI_REGEX
+ : SRI_REGEX
+ )
if (!match) { return }
+ if (strict && !SPEC_ALGORITHMS.some(a => a === match[1])) { return }
this.algorithm = match[1]
this.digest = match[2]
const rawOpts = match[3]
this.options = rawOpts ? rawOpts.slice(1).split('?') : []
}
- toString () {
- const opts = this.options && this.options.length
+ toString (opts) {
+ if (opts && opts.strict) {
+ // Strict mode enforces the standard as close to the foot of the
+ // letter as it can.
+ if (!(
+ // The spec has very restricted productions for algorithms.
+ // https://www.w3.org/TR/CSP2/#source-list-syntax
+ SPEC_ALGORITHMS.some(x => x === this.algorithm) &&
+ // Usually, if someone insists on using a "different" base64, we
+ // leave it as-is, since there's multiple standards, and the
+ // specified is not a URL-safe variant.
+ // https://www.w3.org/TR/CSP2/#base64_value
+ this.digest.match(BASE64_REGEX) &&
+ // Option syntax is strictly visual chars.
+ // https://w3c.github.io/webappsec-subresource-integrity/#grammardef-option-expression
+ // https://tools.ietf.org/html/rfc5234#appendix-B.1
+ (this.options || []).every(opt => opt.match(VCHAR_REGEX))
+ )) {
+ return ''
+ }
+ }
+ const options = this.options && this.options.length
? `?${this.options.join('?')}`
: ''
- return `${this.algorithm}-${this.digest}${opts}`
+ return `${this.algorithm}-${this.digest}${options}`
}
}
class Integrity {
- toString (sep) {
- sep = sep || ' '
+ toString (opts) {
+ opts = opts || {}
+ let sep = opts.sep || ' '
+ if (opts.strict) {
+ // Entries must be separated by whitespace, according to spec.
+ sep = sep.replace(/\S+/g, ' ')
+ }
return Object.keys(this).map(k => {
return this[k].map(meta => {
- return IntegrityMetadata.prototype.toString.call(meta)
- })
- }).join(sep)
+ return IntegrityMetadata.prototype.toString.call(meta, opts)
+ }).filter(x => x.length).join(sep)
+ }).filter(x => x.length).join(sep)
}
- concat (integrity) {
+ concat (integrity, opts) {
const other = typeof integrity === 'string'
? integrity
: serialize(integrity)
- return parse(`${this.toString()} ${other}`)
+ return parse(`${this.toString()} ${other}`, opts)
}
}
module.exports.parse = parse
-function parse (integrity) {
+function parse (sri, opts) {
+ opts = opts || {}
+ if (typeof sri === 'string') {
+ return _parse(sri, opts)
+ } else if (sri.algorithm && sri.digest) {
+ const fullSri = new Integrity()
+ fullSri[sri.algorithm] = [sri]
+ return _parse(serialize(fullSri, opts), opts)
+ } else {
+ return _parse(serialize(sri, opts), opts)
+ }
+}
+
+function _parse (integrity, opts) {
// 3.4.3. Parse metadata
// https://w3c.github.io/webappsec-subresource-integrity/#parse-metadata
return integrity.trim().split(/\s+/).reduce((acc, string) => {
- const metadata = new IntegrityMetadata(string)
+ const metadata = new IntegrityMetadata(string, opts)
if (metadata.algorithm && metadata.digest) {
const algo = metadata.algorithm
if (!acc[algo]) { acc[algo] = [] }
@@ -60,11 +109,11 @@ function parse (integrity) {
module.exports.serialize = serialize
module.exports.unparse = serialize
-function serialize (obj, sep) {
+function serialize (obj, opts) {
if (obj.algorithm && obj.digest) {
- return IntegrityMetadata.prototype.toString.call(obj)
+ return IntegrityMetadata.prototype.toString.call(obj, opts)
} else {
- return Integrity.prototype.toString.call(obj, sep)
+ return Integrity.prototype.toString.call(obj, opts)
}
}
@@ -77,7 +126,10 @@ function fromData (data, opts) {
: ''
return algorithms.reduce((acc, algo) => {
const digest = crypto.createHash(algo).update(data).digest('base64')
- const meta = new IntegrityMetadata(`${algo}-${digest}${optString}`)
+ const meta = new IntegrityMetadata(
+ `${algo}-${digest}${optString}`,
+ opts
+ )
if (meta.algorithm && meta.digest) {
const algo = meta.algorithm
if (!acc[algo]) { acc[algo] = [] }
@@ -103,7 +155,10 @@ function fromStream (stream, opts) {
resolve(algorithms.reduce((acc, algo, i) => {
const hash = hashes[i]
const digest = hash.digest('base64')
- const meta = new IntegrityMetadata(`${algo}-${digest}${optString}`)
+ const meta = new IntegrityMetadata(
+ `${algo}-${digest}${optString}`,
+ opts
+ )
if (meta.algorithm && meta.digest) {
const algo = meta.algorithm
if (!acc[algo]) { acc[algo] = [] }
@@ -118,13 +173,7 @@ function fromStream (stream, opts) {
module.exports.checkData = checkData
function checkData (data, sri, opts) {
opts = opts || {}
- if (typeof sri === 'string') {
- sri = parse(sri)
- } else if (sri.algorithm && sri.digest) {
- const fullSri = new Integrity()
- fullSri[sri.algorithm] = [sri]
- sri = fullSri
- }
+ sri = parse(sri, opts)
const pickAlgorithm = opts.pickAlgorithm || getPrioritizedHash
const algorithm = Object.keys(sri).reduce((acc, algo) => {
return pickAlgorithm(acc, algo) || acc
@@ -152,13 +201,7 @@ function checkStream (stream, sri, opts) {
module.exports.createCheckerStream = createCheckerStream
function createCheckerStream (sri, opts) {
opts = opts || {}
- if (typeof sri === 'string') {
- sri = parse(sri)
- } else if (sri.algorithm && sri.digest) {
- const fullSri = new Integrity()
- fullSri[sri.algorithm] = [sri]
- sri = fullSri
- }
+ sri = parse(sri, opts)
const pickAlgorithm = opts.pickAlgorithm || getPrioritizedHash
const algorithm = Object.keys(sri).reduce((acc, algo) => {
return pickAlgorithm(acc, algo) || acc