Skip to content

Commit

Permalink
Sonnet: Spellcheck highlighter implementation (#1327)
Browse files Browse the repository at this point in the history
* Sonnet: Initial commit

* Sonnet: Remove Sonnet_export

* Sonnet: Fix build

* Sonnet: Fix build for Qt< 5.7

* Sonnet: Fix hunspell not found

* Sonnet: Fix mac and windows build

* Sonnet: Attempt fixing mac build

* Sonnet: Attempt Fixing Mac and win build

* Sonnet: Fix Mac build

* Fix Mac build

* Fix Mac build

* Sonnet: Attempt Fixing DLL problems windows

* Sonnet: Fix Windows Build

* Sonnet: Attempt fix windows build

* Sonnet: Attempt fix windows build

* Sonnet: Add more debug logging

* Sonnet: Add more dirs for dicts in windows

* Sonnet: Spellcheck highlighter implementation

* Sonnet: Fix windows build
  • Loading branch information
Waqar144 authored and pbek committed Oct 25, 2019
1 parent ed3c5b8 commit 5a36767
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 18 deletions.
159 changes: 142 additions & 17 deletions src/helpers/qownnotesmarkdownhighlighter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,16 @@ QOwnNotesMarkdownHighlighter::QOwnNotesMarkdownHighlighter(
Q_UNUSED(highlightingOptions)

spellchecker = new Sonnet::Speller();
languageFilter = new Sonnet::LanguageFilter(new Sonnet::SentenceTokenizer());
wordTokenizer = new Sonnet::WordTokenizer();
wordCount = 0;
errorCount = 0;
codeBlock = 0;

qDebug () <<"[Sonnet]Available Langs: "<< spellchecker->availableLanguages();
qDebug () <<"[Sonnet]Available Backend: "<< spellchecker->availableBackends();
qDebug () <<"[Sonnet]Available Dicts: "<< spellchecker->availableDictionaries();
qDebug () <<"[Sonnet]Available Lang names: "<< spellchecker->availableLanguageNames();
}

void QOwnNotesMarkdownHighlighter::updateCurrentNote() {
Expand Down Expand Up @@ -135,6 +145,46 @@ void QOwnNotesMarkdownHighlighter::highlightBrokenNotesLink(const QString& text)
* Spellchecker lives here
*/

//LanguageCache class
// * Copyright (C) 2004 Zack Rusin <zack@kde.org>
// * Copyright (C) 2006 Laurent Montel <montel@kde.org>
// * Copyright (C) 2013 Martin Sandsmark <martin.sandsmark@org>
class LanguageCache : public QTextBlockUserData
{
public:
// Key: QPair<start, length>
// Value: language name
QMap<QPair<int, int>, QString> languages;

// Remove all cached language information after @p pos
void invalidate(int pos)
{
QMutableMapIterator<QPair<int, int>, QString> it(languages);
it.toBack();
while (it.hasPrevious()) {
it.previous();
if (it.key().first+it.key().second >= pos) {
it.remove();
} else {
break;
}
}
}

QString languageAtPos(int pos) const
{
// The data structure isn't really great for such lookups...
QMapIterator<QPair<int, int>, QString> it(languages);
while (it.hasNext()) {
it.next();
if (it.key().first <= pos && it.key().first + it.key().second >= pos) {
return it.value();
}
}
return QString();
}
};

QString QOwnNotesMarkdownHighlighter::currentLanguage() const {
return spellchecker->language();
}
Expand All @@ -159,7 +209,7 @@ void QOwnNotesMarkdownHighlighter::setMisspelled(const int start, const int coun

void QOwnNotesMarkdownHighlighter::unsetMisspelled(int start, int count) {
//keep the existing format
QTextCharFormat format = QSyntaxHighlighter::format(start);
QTextCharFormat format = QSyntaxHighlighter::format(start+1);

//turn off the spell-check underline if it is turned on.
//Note: Don't use - format.fontUnderline() - to check whether
Expand All @@ -171,30 +221,105 @@ void QOwnNotesMarkdownHighlighter::unsetMisspelled(int start, int count) {
setFormat(start, count, format);
}

static bool hasNotEmptyText(const QString &text)
{
for (int i = 0; i < text.length(); ++i) {
if (!text.at(i).isSpace()) {
return true;
}
}
return false;
}

void QOwnNotesMarkdownHighlighter::highlightSpellChecking(const QString &text) {
//TODO: include other characters, for other languages
// add auto detection of languages
if (text == "```") {
codeBlock++;
}
if (codeBlock % 2 != 0) {
return;
}
if (!hasNotEmptyText(text)) {
return;
}
if (!spellchecker->isValid()) {
qDebug () << "[Sonnet]Spellchercher invalid!";
}

languageFilter->setBuffer(text);

qDebug () <<"[Sonnet]Available Langs: "<< spellchecker->availableLanguages();
qDebug () <<"[Sonnet]Available Backend: "<< spellchecker->availableBackends();
qDebug () <<"[Sonnet]Available Dicts: "<< spellchecker->availableDictionaries();
qDebug () <<"[Sonnet]Available Lang names: "<< spellchecker->availableLanguageNames();
LanguageCache *languageCache = dynamic_cast<LanguageCache*>(currentBlockUserData());
if (!languageCache) {
languageCache = new LanguageCache;
setCurrentBlockUserData(languageCache);
}

QRegularExpression regex("[a-zA-Z]+");
QRegularExpressionMatchIterator it = regex.globalMatch(text);
while(it.hasNext()){
QRegularExpressionMatch m = it.next();
QString word = m.captured();
bool isMisspelled = !word.isEmpty()
&& word.length() > 1
&& isWordMisspelled(word);
if(isMisspelled) {
setMisspelled(m.capturedStart(0), m.capturedEnd());
const bool autodetectLanguage = spellchecker->testAttribute(Sonnet::Speller::AutoDetectLanguage);
while (languageFilter->hasNext()) {
QStringRef sentence = languageFilter->next();
if (autodetectLanguage) {
QString lang;
QPair<int, int> spos = QPair<int, int>(sentence.position(), sentence.length());
// try cache first
if (languageCache->languages.contains(spos)) {
lang = languageCache->languages.value(spos);
} else {
lang = languageFilter->language();
if (!languageFilter->isSpellcheckable()) {
lang.clear();
}
languageCache->languages[spos] = lang;
}
if (lang.isEmpty()) {
continue;
}
qDebug () << "Sentence: " << sentence;
qDebug () << "Language detected: " << lang;
spellchecker->setLanguage(lang);
}
else {
unsetMisspelled(m.capturedStart(0), m.capturedEnd());

wordTokenizer->setBuffer(sentence.toString());
int offset = sentence.position();
while (wordTokenizer->hasNext()) {
QStringRef word = wordTokenizer->next();
if (!wordTokenizer->isSpellcheckable()) {
continue;
}
++wordCount;
if (spellchecker->isMisspelled(word.toString())) {
++errorCount;
qDebug () << "Word->Position + offset" << word.position() + offset;
qDebug () << "Word->length" << word.length();
setMisspelled(word.position()+offset, word.length());
} else {
//unsetMisspelled(word.position()+offset, word.length());
}
}
}

setCurrentBlockState(0);

/*
* Old implementation
* Will be removed later
*/
// QRegularExpression regex("[a-zA-Z]+");
// QRegularExpressionMatchIterator it = regex.globalMatch(text);
// while(it.hasNext()){
// QRegularExpressionMatch m = it.next();
// QString word = m.captured();
// bool isMisspelled = !word.isEmpty()
// && word.length() > 3
// && isWordMisspelled(word)
// && word != " ";
// if(isMisspelled) {
// setMisspelled(m.capturedStart(0), m.capturedEnd());
// }
// else {
// //disabling this for now because it's breaking the markdown highlighting
// //unsetMisspelled(m.capturedStart(0), m.capturedEnd());
// }
// }
}

7 changes: 7 additions & 0 deletions src/helpers/qownnotesmarkdownhighlighter.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#endif

#include "libraries/sonnet/src/core/speller.h"
#include "libraries/sonnet/src/core/languagefilter_p.h"
#include "libraries/sonnet/src/core/tokenizer_p.h"

QT_BEGIN_NAMESPACE
class QTextDocument;
Expand Down Expand Up @@ -64,4 +66,9 @@ Q_OBJECT
void updateCurrentNote();

Sonnet::Speller *spellchecker;
Sonnet::LanguageFilter *languageFilter;
Sonnet::WordTokenizer *wordTokenizer;
int wordCount;
int errorCount;
int codeBlock;
};
2 changes: 2 additions & 0 deletions src/libraries/sonnet/src/core/guesslanguage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,8 @@ void GuessLanguagePrivate::loadModels()
QCoreApplication::applicationDirPath());
#endif
}
triMapFile = QStringLiteral("%1/trigrams.map").arg(
QCoreApplication::applicationDirPath());
qCDebug(SONNET_LOG_CORE) << "Loading trigrams from" << triMapFile;

QFile sin(triMapFile);
Expand Down
2 changes: 1 addition & 1 deletion src/libraries/sonnet/src/core/languagefilter_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#define LANGUAGEFILTER_H

#include <QString>
#include <tokenizer_p.h>
#include "tokenizer_p.h"
#include "sonnetcore_export.h"

namespace Sonnet {
Expand Down
1 change: 1 addition & 0 deletions src/libraries/sonnet/src/core/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <QList>
#include <QString>
#include <QDebug>

#include "tokenizer_p.h"
#include "textbreaks_p.h"
Expand Down

0 comments on commit 5a36767

Please sign in to comment.