Permalink
Browse files

fix: serving binary files

For binary files, we can't cache the content as a string (utf8), we need to use a binary buffer.

All the preprocessors are currently implemented to deal with strings, as that makes sense for text files. If we changed preprocessors to deal with buffers then each preprocessor would have to convert the buffer to a string, do the string manipulation and then convert it back to a buffer. I don't think that's worthy as there are no binary preprocessors.

This change disables preprocessing of binary files to avoid weird errors.
It shows warning. If there is a reasonable use case for preprocessing binary files we can figure out something.

Closes #864
Closes #885
  • Loading branch information...
1 parent aca84dc commit 8a30cf55751bbaec672597f4f0ed66fe8742095f @vojtajina vojtajina committed Feb 5, 2014
Showing with 45 additions and 8 deletions.
  1. +26 −8 lib/preprocessor.js
  2. +19 −0 test/unit/preprocessor.spec.coffee
View
@@ -1,3 +1,4 @@
+var path = require('path');
var fs = require('graceful-fs');
var crypto = require('crypto');
var mm = require('minimatch');
@@ -10,12 +11,29 @@ var sha1 = function(data) {
return hash.digest('hex');
};
+var isBinary = Object.create(null);
@oncletom

oncletom Mar 12, 2014

Contributor

So in our case, just adding dat in that list would suffice. I'll create a PR for that.

@vojtajina

vojtajina Mar 14, 2014

Contributor

yep, saw your PR, makes sense

+[
+ 'adp', 'au', 'mid', 'mp4a', 'mpga', 'oga', 's3m', 'sil', 'eol', 'dra', 'dts', 'dtshd', 'lvp',
+ 'pya', 'ecelp4800', 'ecelp7470', 'ecelp9600', 'rip', 'weba', 'aac', 'aif', 'caf', 'flac', 'mka',
+ 'm3u', 'wax', 'wma', 'wav', 'xm', 'flac', '3gp', '3g2', 'h261', 'h263', 'h264', 'jpgv', 'jpm',
+ 'mj2', 'mp4', 'mpeg', 'ogv', 'qt', 'uvh', 'uvm', 'uvp', 'uvs', 'dvb', 'fvt', 'mxu', 'pyv', 'uvu',
+ 'viv', 'webm', 'f4v', 'fli', 'flv', 'm4v', 'mkv', 'mng', 'asf', 'vob', 'wm', 'wmv', 'wmx', 'wvx',
+ 'movie', 'smv', 'ts', 'bmp', 'cgm', 'g3', 'gif', 'ief', 'jpg', 'jpeg', 'ktx', 'png', 'btif',
+ 'sgi', 'svg', 'tiff', 'psd', 'uvi', 'sub', 'djvu', 'dwg', 'dxf', 'fbs', 'fpx', 'fst', 'mmr',
+ 'rlc', 'mdi', 'wdp', 'npx', 'wbmp', 'xif', 'webp', '3ds', 'ras', 'cmx', 'fh', 'ico', 'pcx', 'pic',
+ 'pnm', 'pbm', 'pgm', 'ppm', 'rgb', 'tga', 'xbm', 'xpm', 'xwd', 'zip', 'rar', 'tar', 'bz2', 'eot',
+ 'ttf', 'woff'
+].forEach(function(extension) {
+ isBinary['.' + extension] = true;
@vojtajina

vojtajina Mar 14, 2014

Contributor

Sure, we can use it.

These extensions are just copy/pasted from chokidar.

+});
+
// TODO(vojta): instantiate preprocessors at the start to show warnings immediately
var createPreprocessor = function(config, basePath, injector) {
var patterns = Object.keys(config);
var alreadyDisplayedWarnings = Object.create(null);
return function(file, done) {
+ var thisFileIsBinary = isBinary[path.extname(file.originalPath)];
var preprocessors = [];
var nextPreprocessor = function(error, content) {
// normalize B-C
@@ -61,18 +79,18 @@ var createPreprocessor = function(config, basePath, injector) {
// TODO(vojta): should we cache this ?
for (var i = 0; i < patterns.length; i++) {
if (mm(file.originalPath, patterns[i])) {
- config[patterns[i]].forEach(instantiatePreprocessor);
+ if (thisFileIsBinary) {
+ log.warn('Ignoring preprocessing (%s) %s because it is a binary file.',
+ config[patterns[i]].join(', '), file.originalPath);
+ } else {
+ config[patterns[i]].forEach(instantiatePreprocessor);
+ }
}
}
- // compute SHA of the content
- preprocessors.push(function(content, file, done) {
- file.sha = sha1(content);
- done(content);
- });
-
return fs.readFile(file.originalPath, function(err, buffer) {
- nextPreprocessor(buffer.toString());
+ file.sha = sha1(buffer);
+ nextPreprocessor(null, thisFileIsBinary ? buffer : buffer.toString());
});
};
};
@@ -12,6 +12,7 @@ describe 'preprocessor', ->
some:
'a.js': mocks.fs.file 0, 'content'
'a.txt': mocks.fs.file 0, 'some-text'
+ 'photo.png': mocks.fs.file 0, 'binary'
mocks_ =
'graceful-fs': mockFs
@@ -133,3 +134,21 @@ describe 'preprocessor', ->
pp file, (err) ->
expect(fakePreprocessor).not.to.have.been.called
done()
+
+
+ it 'should not preprocess binary files', (done) ->
+ fakePreprocessor = sinon.spy (content, file, done) ->
+ done null, content
+
+ injector = new di.Injector [{
+ 'preprocessor:fake': ['factory', -> fakePreprocessor]
+ }]
+
+ pp = m.createPreprocessor {'**/*': ['fake']}, null, injector
+
+ file = {originalPath: '/some/photo.png', path: 'path'}
+
+ pp file, (err) ->
+ expect(fakePreprocessor).not.to.have.been.called
+ expect(file.content).to.be.an.instanceof Buffer
+ done()

1 comment on commit 8a30cf5

Contributor

oncletom commented on 8a30cf5 Mar 12, 2014

I have a use case of processing binary files :-p
Temporarily worked around in bbc/peaks.js@e54759e
We basically serve binary data to describe audio waveforms, because they are smaller in size and memory footprint.

Hopefully we have a JSON fallback but now we cannot tests the binary cases.

Please sign in to comment.