Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix #269467: added stringutils class to support searching without acc…
…ents, added tests in mtest
- Loading branch information
Showing
7 changed files
with
287 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
//============================================================================= | ||
// MuseScore | ||
// Linux Music Score Editor | ||
// Copyright (C) 2002-2018 Werner Schweer and others | ||
// | ||
// This program is free software; you can redistribute it and/or modify | ||
// it under the terms of the GNU General Public License version 2. | ||
// | ||
// This program is distributed in the hope that it will be useful, | ||
// but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
// GNU General Public License for more details. | ||
// | ||
// You should have received a copy of the GNU General Public License | ||
// along with this program; if not, write to the Free Software | ||
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
//============================================================================= | ||
|
||
#include "stringutils.h" | ||
|
||
namespace Ms { | ||
|
||
//--------------------------------------------------------- | ||
// removeLigatures | ||
//--------------------------------------------------------- | ||
|
||
QString stringutils::removeLigatures(const QString& pre) | ||
{ | ||
QString result = pre; | ||
|
||
// Characters with above dots (Ae, ae, Oe, oe, Ue, ue | ||
result = result.replace(QRegularExpression("[\\x{00C4}]"), QString("Ae")); | ||
result = result.replace(QRegularExpression("[\\x{00E4}]"), QString("ae")); | ||
result = result.replace(QRegularExpression("[\\x{00D6}]"), QString("Oe")); | ||
result = result.replace(QRegularExpression("[\\x{00F6}]"), QString("oe")); | ||
result = result.replace(QRegularExpression("[\\x{00DC}]"), QString("Ue")); | ||
result = result.replace(QRegularExpression("[\\x{00FC}]"), QString("ue")); | ||
|
||
// Latin Big Letter AA (Ꜳ) and Latin Small Letter aa (ꜳ) | ||
result = result.replace(QRegularExpression("[\\x{A732}]"), QString("Aa")); | ||
result = result.replace(QRegularExpression("[\\x{A733}]"), QString("aa")); | ||
|
||
// Latin Big Letter AE (Æ) and Latin Small Letter ae (æ) | ||
result = result.replace(QRegularExpression("[\\x{00C6}]"), QString("Ae")); | ||
result = result.replace(QRegularExpression("[\\x{00E6}]"), QString("ae")); | ||
|
||
// Latin Big Letter AO (Ꜵ) and Latin Small Letter ao (ꜵ) | ||
result = result.replace(QRegularExpression("[\\x{A734}]"), QString("Ao")); | ||
result = result.replace(QRegularExpression("[\\x{A735}]"), QString("ao")); | ||
|
||
// Latin Big Letter AU (Ꜷ) and Latin Small Letter au (ꜷ) | ||
result = result.replace(QRegularExpression("[\\x{A736}]"), QString("Au")); | ||
result = result.replace(QRegularExpression("[\\x{A737}]"), QString("au")); | ||
|
||
// IJ (IJ) and ij (ij) | ||
result = result.replace(QRegularExpression("[\\x{0132}]"), QString("IJ")); | ||
result = result.replace(QRegularExpression("[\\x{0133}]"), QString("ij")); | ||
|
||
// Eszett SS (ẞ) and ss (ß) | ||
result = result.replace(QRegularExpression("[\\x{1E9E}]"), QString("SS")); | ||
result = result.replace(QRegularExpression("[\\x{00DF}]"), QString("ss")); | ||
|
||
// O with stroke (Ø) and o with stroke (ø) | ||
result = result.replace(QRegularExpression("[\\x{00D8}]"), QChar('O')); | ||
result = result.replace(QRegularExpression("[\\x{00F8}]"), QChar('o')); | ||
|
||
// Big Letter OE (Œ) and small letter oe (œ) | ||
result = result.replace(QRegularExpression("[\\x{0152}]"), QString("Oe")); | ||
result = result.replace(QRegularExpression("[\\x{0153}]"), QString("oe")); | ||
|
||
// Big Letter OO (Ꝏ) and small letter oo (ꝏ) | ||
result = result.replace(QRegularExpression("[\\x{A74E}]"), QString("Oo")); | ||
result = result.replace(QRegularExpression("[\\x{A74F}]"), QString("oo")); | ||
|
||
// ue (ᵫ) | ||
result = result.replace(QRegularExpression("[\\x{1D6B}]"), QString("ue")); | ||
|
||
return result; | ||
} | ||
|
||
//--------------------------------------------------------- | ||
// removeDiacritics | ||
//--------------------------------------------------------- | ||
|
||
QString stringutils::removeDiacritics(const QString& pre) | ||
{ | ||
QString text = pre.normalized(QString::NormalizationForm_KD); | ||
QString result; | ||
for (int i = 0; i < text.length(); ++i) { | ||
if (text.at(i).category() != QChar::Mark_NonSpacing) { | ||
result.append(text.at(i)); | ||
} | ||
} | ||
result = result.normalized(QString::NormalizationForm_C); | ||
return result; | ||
} | ||
|
||
} // namespace Ms |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
//============================================================================= | ||
// MuseScore | ||
// Linux Music Score Editor | ||
// Copyright (C) 2002-2018 Werner Schweer and others | ||
// | ||
// This program is free software; you can redistribute it and/or modify | ||
// it under the terms of the GNU General Public License version 2. | ||
// | ||
// This program is distributed in the hope that it will be useful, | ||
// but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
// GNU General Public License for more details. | ||
// | ||
// You should have received a copy of the GNU General Public License | ||
// along with this program; if not, write to the Free Software | ||
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
//============================================================================= | ||
|
||
#ifndef STRINGUTILS_H | ||
#define STRINGUTILS_H | ||
|
||
#include <QObject> | ||
|
||
namespace Ms { | ||
|
||
class stringutils : public QObject | ||
{ | ||
Q_OBJECT | ||
|
||
public: | ||
static QString removeLigatures(const QString& pre); | ||
static QString removeDiacritics(const QString& pre); | ||
}; | ||
|
||
} // namespace Ms | ||
|
||
#endif // STRINGUTILS_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#============================================================================= | ||
# MuseScore | ||
# Music Composition & Notation | ||
# $Id:$ | ||
# | ||
# Copyright (C) 2018 Werner Schweer | ||
# | ||
# This program is free software; you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License version 2 | ||
# as published by the Free Software Foundation and appearing in | ||
# the file LICENSE.GPL | ||
#============================================================================= | ||
|
||
set(TARGET tst_stringutils) | ||
|
||
include(${PROJECT_SOURCE_DIR}/mtest/cmake.inc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
//============================================================================= | ||
// MuseScore | ||
// Music Composition & Notation | ||
// $Id:$ | ||
// | ||
// Copyright (C) 2018 Werner Schweer | ||
// | ||
// This program is free software; you can redistribute it and/or modify | ||
// it under the terms of the GNU General Public License version 2 | ||
// as published by the Free Software Foundation and appearing in | ||
// the file LICENCE.GPL | ||
//============================================================================= | ||
|
||
#include <QtTest/QtTest> | ||
#include <QVector> | ||
#include "mtest/testutils.h" | ||
#include "mscore/stringutils.h" | ||
|
||
#define DIR QString("stringutils/") | ||
|
||
using namespace Ms; | ||
|
||
//--------------------------------------------------------- | ||
// TestStringUtils | ||
//--------------------------------------------------------- | ||
|
||
class TestStringUtils : public QObject, public MTest | ||
{ | ||
Q_OBJECT | ||
|
||
private slots: | ||
void initTestCase(); | ||
void tst_stringutils(); | ||
}; | ||
|
||
//--------------------------------------------------------- | ||
// initTestCase | ||
//--------------------------------------------------------- | ||
|
||
void TestStringUtils::initTestCase() | ||
{ | ||
initMTest(); | ||
} | ||
|
||
//--------------------------------------------------------- | ||
// stringutils | ||
//--------------------------------------------------------- | ||
|
||
void TestStringUtils::tst_stringutils() | ||
{ | ||
QString testEnglish("Test test"); | ||
QVERIFY(stringutils::removeDiacritics(testEnglish) == testEnglish); | ||
QVERIFY(stringutils::removeLigatures(testEnglish) == testEnglish); | ||
|
||
QString testNonLatin1("Πκολο"); | ||
QVERIFY(stringutils::removeDiacritics(testNonLatin1) == testNonLatin1); | ||
QVERIFY(stringutils::removeLigatures(testNonLatin1) == testNonLatin1); | ||
|
||
QString testNonLatin2("超倍低音长笛"); | ||
QVERIFY(stringutils::removeDiacritics(testNonLatin2) == testNonLatin2); | ||
QVERIFY(stringutils::removeLigatures(testNonLatin2) == testNonLatin2); | ||
|
||
QString testNonLatin3("פיקולו"); | ||
QVERIFY(stringutils::removeDiacritics(testNonLatin3) == testNonLatin3); | ||
QVERIFY(stringutils::removeLigatures(testNonLatin3) == testNonLatin3); | ||
|
||
QString testNonLatin4("پیکولو"); | ||
QVERIFY(stringutils::removeDiacritics(testNonLatin4) == testNonLatin4); | ||
QVERIFY(stringutils::removeLigatures(testNonLatin4) == testNonLatin4); | ||
|
||
const QVector<QChar> ligatureVector({ | ||
QChar(0xA732), | ||
QChar(0xA733), | ||
QChar(0x00C6), | ||
QChar(0x00C4), | ||
QChar(0x00E6), | ||
QChar(0x00E4), | ||
QChar(0xA734), | ||
QChar(0xA735), | ||
QChar(0xA736), | ||
QChar(0xA737), | ||
QChar(0x0132), | ||
QChar(0x0133), | ||
QChar(0x1E9E), | ||
QChar(0x00DF), | ||
QChar(0x00D8), | ||
QChar(0x00F8), | ||
QChar(0x0152), | ||
QChar(0x00D6), | ||
QChar(0x0153), | ||
QChar(0x00F6), | ||
QChar(0xA74E), | ||
QChar(0xA74F), | ||
QChar(0x00DC), | ||
QChar(0x00FC), | ||
QChar(0x1D6B)}); | ||
QString testLigatures; | ||
for (int i = 0; i < ligatureVector.size(); i++) { | ||
testLigatures.append(ligatureVector.at(i)); | ||
} | ||
QVERIFY(stringutils::removeLigatures(testLigatures) == QString("AaaaAeAeaeaeAoaoAuauIJijSSssOoOeOeoeoeOoooUeueue")); | ||
|
||
const QVector<QChar> diacriticVector({ | ||
QChar(0x00E9), // acute e | ||
QChar(0x00C9), // acute E | ||
QChar(0x00E8), // grave e | ||
QChar(0x00C8), // grave E | ||
QChar(0x00EA), // circumflex e | ||
QChar(0x00CA), // circumflex E | ||
QChar(0x00E7), // cedilla c | ||
QChar(0x00C7), // cedilla C | ||
QChar(0x00F1), // tilde n | ||
QChar(0x00D1), // tilde N | ||
QChar(0x00E5), // ring a | ||
QChar(0x00C5), // ring A | ||
QChar(0x010D), // caron c | ||
QChar(0x010C)}); // caron C | ||
QString testDiacritics; | ||
for (int i = 0; i < diacriticVector.size(); i++) { | ||
testDiacritics.append(diacriticVector.at(i)); | ||
} | ||
QVERIFY(stringutils::removeDiacritics(testDiacritics) == QString("eEeEeEcCnNaAcC")); | ||
} | ||
|
||
QTEST_MAIN(TestStringUtils) | ||
#include "tst_stringutils.moc" |