Permalink
Browse files

Improve handling of BOMs in table cells

Detect some Unicode BOMs and always treat data starting with a BOM as
text. We might need to fine-tune this later but it should be an
improvement already.

In the Edit Dialog remove the BOM from the text editor but keep it in
the hex editor. Also add it back to the text when saving changes in text
mode. This way the BOM is out of the way for text edits but is not lost
either when editing a cell.
  • Loading branch information...
MKleusberg committed Jan 1, 2018
1 parent 8f03124 commit 27c657902e354b11fb5778cc09518a95d37d3698
Showing with 59 additions and 7 deletions.
  1. +36 −0 src/Data.cpp
  2. +8 −0 src/Data.h
  3. +14 −7 src/EditDialog.cpp
  4. +1 −0 src/EditDialog.h
@@ -4,6 +4,10 @@

bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
{
// If the data starts with a Unicode BOM, we always assume it is text
if(startsWithBom(data))
return true;

// Truncate to the first couple of bytes for quick testing
if(quickTest)
data = data.left(512);
@@ -15,3 +19,35 @@ bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
// Perform check
return QString(data).toUtf8() == data;
}

bool startsWithBom(const QByteArray& data)
{
// Note that these aren't all possible BOMs. But they are probably the most common ones.

if(data.startsWith("\xEF\xBB\xBF") ||
data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE") ||
data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00"))
return true;
else
return false;
}

QByteArray removeBom(QByteArray& data)
{
if(data.startsWith("\xEF\xBB\xBF"))
{
QByteArray bom = data.left(3);
data.remove(0, 3);
return bom;
} else if(data.startsWith("\xFE\xFF") || data.startsWith("\xFF\xFE")) {
QByteArray bom = data.left(2);
data.remove(0, 2);
return bom;
} else if(data.startsWith("\x00\x00\xFE\xFF") || data.startsWith("\xFF\xFE\x00\x00")) {
QByteArray bom = data.left(4);
data.remove(0, 4);
return bom;
} else {
return QByteArray();
}
}
@@ -9,4 +9,12 @@
// text but makes it less reliable
bool isTextOnly(QByteArray data, const QString& encoding = QString(), bool quickTest = false);

// This function returns true if the data in the data parameter starts with a Unicode BOM. Otherwise it returns false.
bool startsWithBom(const QByteArray& data);

// This function checks if the data in the data parameter starts with a Unicode BOM. If so, the BOM is removed from the
// byte array and passed back to the caller separately as the return value of the function. If the data does not start
// with a BOM an empty byte array is returned and the original data is not modified.
QByteArray removeBom(QByteArray& data);

#endif
@@ -93,6 +93,9 @@ void EditDialog::loadData(const QByteArray& data)
QImage img;
QString textData;

// Clear previously removed BOM
removedBom.clear();

// Determine the data type, saving that info in the class variable
dataType = checkDataType(data);

@@ -150,25 +153,28 @@ void EditDialog::loadData(const QByteArray& data)

case Text:
case JSON:

// Set enabled any of the text widgets
ui->editorText->setEnabled(true);
jsonEdit->setEnabled(true);

switch (editMode) {
case TextEditor:
{
// The text widget buffer is now the main data source
dataSource = TextBuffer;

// Load the text into the text editor
textData = QString::fromUtf8(data.constData(), data.size());
// Load the text into the text editor, remove BOM first if there is one
QByteArray dataWithoutBom = data;
removedBom = removeBom(dataWithoutBom);

textData = QString::fromUtf8(dataWithoutBom.constData(), dataWithoutBom.size());
ui->editorText->setPlainText(textData);

// Select all of the text by default
ui->editorText->selectAll();

break;

}
case JsonEditor:
// The JSON widget buffer is now the main data source
dataSource = JsonBuffer;
@@ -373,6 +379,7 @@ void EditDialog::setNull()
hexEdit->setData(QByteArray());
jsonEdit->clear();
dataType = Null;
removedBom.clear();

// Check if in text editor mode
int editMode = ui->editorStack->currentIndex();
@@ -425,10 +432,10 @@ void EditDialog::accept()
} else {
// It's not NULL, so proceed with normal text string checking
QString oldData = currentIndex.data(Qt::EditRole).toString();
QString newData = ui->editorText->toPlainText();
QString newData = removedBom + ui->editorText->toPlainText();
if (oldData != newData)
// The data is different, so commit it back to the database
emit recordTextUpdated(currentIndex, newData.toUtf8(), false);
emit recordTextUpdated(currentIndex, removedBom + newData.toUtf8(), false);
}
break;
case JsonBuffer:
@@ -509,7 +516,7 @@ void EditDialog::editModeChanged(int newMode)

case HexEditor: // Switching to the hex editor
// Convert the text widget buffer for the hex widget
hexEdit->setData(ui->editorText->toPlainText().toUtf8());
hexEdit->setData(removedBom + ui->editorText->toPlainText().toUtf8());

// The hex widget buffer is now the main data source
dataSource = HexBuffer;
@@ -57,6 +57,7 @@ private slots:
bool textNullSet;
bool isReadOnly;
bool mustIndentAndCompact;
QByteArray removedBom;

enum DataSources {
TextBuffer,

0 comments on commit 27c6579

Please sign in to comment.