Skip to content

Commit

Permalink
Fix wrong BOM detection in startsWithBom and fix BOM checks containing 0
Browse files Browse the repository at this point in the history
The same fix for BOM detection in startsWithBom as already applied in
c9c848e for removeBom, otherwise binary
data is considered text.

Fixed also check for "\x00\x00\xFE\xFF" and "\xFF\xFE\x00\x00", that are
the problematic BOMs since they contain the null character.
  • Loading branch information
mgrojo committed Jan 2, 2018
1 parent c9c848e commit feda408
Showing 1 changed file with 16 additions and 9 deletions.
25 changes: 16 additions & 9 deletions src/Data.cpp
@@ -1,6 +1,15 @@
#include "Data.h"

#include <QTextCodec>
#include <QDebug>

This comment has been minimized.

Copy link
@justinclift

justinclift Jan 4, 2018

Member

Left-over debugging bit? 😄

This comment has been minimized.

Copy link
@mgrojo

mgrojo Jan 4, 2018

Author Member

Yep 😄 This one and two others are now deleted: 1be61db


// Note that these aren't all possible BOMs. But they are probably the most common ones.
// The size is needed at least for the ones with character zero in them.
static const QByteArray bom3("\xEF\xBB\xBF", 3);
static const QByteArray bom2a("\xFE\xFF", 2);
static const QByteArray bom2b("\xFF\xFE", 2);
static const QByteArray bom4a("\x00\x00\xFE\xFF", 4);
static const QByteArray bom4b("\xFF\xFE\x00\x00", 4);

bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
{
Expand All @@ -22,28 +31,26 @@ bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)

bool startsWithBom(const QByteArray& data)
{
// Note that these aren't all possible BOMs. But they are probably the most common ones.

if(data.startsWith("\xEF\xBB\xBF") ||
data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE") ||
data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00"))
return true;
if(data.startsWith(bom3) ||
data.startsWith(bom2a) || data.startsWith(bom2b) ||
data.startsWith(bom4a) || data.startsWith(bom4b))
return true;
else
return false;
}

QByteArray removeBom(QByteArray& data)
{
if(data.left(3) == QByteArray("\xEF\xBB\xBF"))
if(data.startsWith(bom3))
{
QByteArray bom = data.left(3);
data.remove(0, 3);
return bom;
} else if(data.left(2) == QByteArray("\xFE\xFF") || data.left(2) == QByteArray("\xFF\xFE")) {
} else if(data.startsWith(bom2a) || data.startsWith(bom2b)) {
QByteArray bom = data.left(2);
data.remove(0, 2);
return bom;
} else if(data.left(4) == QByteArray("\x00\x00\xFE\xFF") || data.left(4) == QByteArray("\xFF\xFE\x00\x00")) {
} else if(data.startsWith(bom4a) || data.startsWith(bom4b)) {
QByteArray bom = data.left(4);
data.remove(0, 4);
return bom;
Expand Down

0 comments on commit feda408

Please sign in to comment.