From 5c48529392de546027997b405a5014a72d18dcd3 Mon Sep 17 00:00:00 2001
From: Alex Kocharin <alex@kocharin.ru>
Date: Thu, 27 May 2021 17:54:29 +0300
Subject: [PATCH] Skip byte order mark when parsing SVG

---
 CHANGELOG.md            |  5 +++++
 lib/parse_stream/svg.js | 19 +++++++++++++++++++
 lib/parse_sync/svg.js   |  3 +++
 test/test_formats.js    | 24 ++++++++++++++++++++++++
 4 files changed, 51 insertions(+)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6f00042..69eb9b5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## [7.1.1] - WIP
+### Fixed
+- Allow byte order mark at the start of SVG, #57.
+
+
 ## [7.1.0] - 2021-04-15
 ### Fixed
 - Fix options merge, when property value is class (switched from `deepmerge` to
diff --git a/lib/parse_stream/svg.js b/lib/parse_stream/svg.js
index 1f092cb..ccb688c 100644
--- a/lib/parse_stream/svg.js
+++ b/lib/parse_stream/svg.js
@@ -125,14 +125,33 @@ module.exports = function () {
   var state    = STATE_IDENTIFY;
   var data_len = 0;
   var str      = '';
+  var buf      = null; // used to manage first chunk in IDENTIFY
 
   var parser = new Transform({
     readableObjectMode: true,
     transform: function transform(chunk, encoding, next) {
       switch (state) {
+        // identify step is needed to fail fast if the file isn't SVG
         case STATE_IDENTIFY:
+          if (buf) {
+            // make sure that first chunk is at least 4 bytes (to do BOM skip later),
+            // last chunk was small
+            chunk = Buffer.concat([ buf, chunk ]);
+            buf = null;
+          }
+
+          if (data_len === 0 && chunk.length < 4) {
+            // make sure that first chunk is at least 4 bytes (to do BOM skip later),
+            // current chunk is small
+            buf = chunk;
+            break;
+          }
+
           var i = 0, max = chunk.length;
 
+          // byte order mark, https://github.com/nodeca/probe-image-size/issues/57
+          if (data_len === 0 && chunk[0] === 0xEF && chunk[1] === 0xBB && chunk[2] === 0xBF) i = 3;
+
           while (i < max && isWhiteSpace(chunk[i])) i++;
 
           if (i >= max) {
diff --git a/lib/parse_sync/svg.js b/lib/parse_sync/svg.js
index 22f1d60..a4aee17 100644
--- a/lib/parse_sync/svg.js
+++ b/lib/parse_sync/svg.js
@@ -14,6 +14,9 @@ function isFinitePositive(val) {
 function canBeSvg(buf) {
   var i = 0, max = buf.length;
 
+  // byte order mark, https://github.com/nodeca/probe-image-size/issues/57
+  if (buf[0] === 0xEF && buf[1] === 0xBB && buf[2] === 0xBF) i = 3;
+
   while (i < max && isWhiteSpace(buf[i])) i++;
 
   if (i === max) return false;
diff --git a/test/test_formats.js b/test/test_formats.js
index 5fb0ad9..f2dfdef 100644
--- a/test/test_formats.js
+++ b/test/test_formats.js
@@ -1256,6 +1256,24 @@ describe('File formats', function () {
       assert.deepStrictEqual(size, { width: 5, height: 4, type: 'svg', mime: 'image/svg+xml', wUnits: 'px', hUnits: 'px' });
     });
 
+    it('should skip BOM', async function () {
+      let size = await probe(Readable.from([ Buffer.from('\ufeff  <svg width="5" height="4"></svg>') ]));
+
+      assert.deepStrictEqual(size, { width: 5, height: 4, type: 'svg', mime: 'image/svg+xml', wUnits: 'px', hUnits: 'px' });
+    });
+
+    it('should skip BOM in different chunks', async function () {
+      let size = await probe(Readable.from([
+        Buffer.from([ 0xEF ]),
+        Buffer.from([ 0xBB, 0xBF ]),
+        Buffer.from(' <s'),
+        Buffer.from('vg width="'),
+        Buffer.from('5" height="5"></svg>')
+      ]));
+
+      assert.deepStrictEqual(size, { width: 5, height: 5, type: 'svg', mime: 'image/svg+xml', wUnits: 'px', hUnits: 'px' });
+    });
+
     /* eslint-disable max-nested-callbacks */
     describe('coverage', function () {
       it('too much data before doctype', async function () {
@@ -1367,6 +1385,12 @@ describe('File formats', function () {
       assert.deepStrictEqual(size, { width: 5, height: 4, type: 'svg', mime: 'image/svg+xml', wUnits: 'px', hUnits: 'px' });
     });
 
+    it('should skip BOM', async function () {
+      let size = probe.sync(Buffer.from('\ufeff  <svg width="5" height="4"></svg>'));
+
+      assert.deepStrictEqual(size, { width: 5, height: 4, type: 'svg', mime: 'image/svg+xml', wUnits: 'px', hUnits: 'px' });
+    });
+
     describe('coverage', function () {
       it('wrong signature', function () {
         let size = probe.sync(Buffer.from('  <not-really-svg width="1" height="1">'));