Skip to content

Commit

Permalink
properly decode HTML data according to declared charset for lite browser
Browse files Browse the repository at this point in the history
  • Loading branch information
martinrotter committed Dec 13, 2023
1 parent 8b91d47 commit 9c7f550
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 17 deletions.
62 changes: 53 additions & 9 deletions src/librssguard/gui/webviewers/qtextbrowser/textbrowserviewer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@
#include <QContextMenuEvent>
#include <QFileIconProvider>
#include <QScrollBar>
#include <QTextCodec>
#include <QTimer>
#include <QtConcurrent>

TextBrowserViewer::TextBrowserViewer(QWidget* parent)
: QTextBrowser(parent), m_resourcesEnabled(false), m_resourceDownloader(new Downloader(this)), m_loadedResources({}),
: QTextBrowser(parent), m_resourcesEnabled(false), m_resourceDownloader(new Downloader()),
m_resourceDownloaderThread(new QThread(this)), m_loadedResources({}),
m_placeholderImage(qApp->icons()->miscPixmap(QSL("image-placeholder"))),
m_placeholderImageError(qApp->icons()->miscPixmap(QSL("image-placeholder-error"))),
m_downloader(new Downloader(this)), m_document(new TextBrowserDocument(this)) {
Expand All @@ -38,17 +41,28 @@ TextBrowserViewer::TextBrowserViewer(QWidget* parent)
setResourcesEnabled(qApp->settings()->value(GROUP(Messages), SETTING(Messages::ShowResourcesInArticles)).toBool());
setDocument(m_document.data());

m_resourceDownloader->moveToThread(m_resourceDownloaderThread);
m_resourceDownloaderThread->start();

connect(this, &TextBrowserViewer::reloadDocument, this, [this]() {
const auto scr = verticalScrollBarPosition();
setHtmlPrivate(html(), m_currentUrl);
setVerticalScrollBarPosition(scr);
});

connect(m_resourceDownloader.data(), &Downloader::completed, this, &TextBrowserViewer::resourceDownloaded);
connect(m_resourceDownloader, &Downloader::completed, this, &TextBrowserViewer::resourceDownloaded);
connect(this, &QTextBrowser::anchorClicked, this, &TextBrowserViewer::onAnchorClicked);
connect(this, QOverload<const QUrl&>::of(&QTextBrowser::highlighted), this, &TextBrowserViewer::linkMouseHighlighted);
}

TextBrowserViewer::~TextBrowserViewer() {
if (m_resourceDownloaderThread->isRunning()) {
m_resourceDownloaderThread->quit();
}

m_resourceDownloader->deleteLater();
}

QSize TextBrowserViewer::sizeHint() const {
auto doc_size = document()->size().toSize();

Expand Down Expand Up @@ -172,13 +186,17 @@ void TextBrowserViewer::setUrl(const QUrl& url) {
else {
QEventLoop loop;

connect(m_downloader.data(), &Downloader::completed, &loop, &QEventLoop::quit);
connect(m_downloader.data(),
&Downloader::completed,
&loop,
&QEventLoop::quit,
Qt::ConnectionType(Qt::ConnectionType::UniqueConnection | Qt::ConnectionType::AutoConnection));
m_downloader->manipulateData(url.toString(), QNetworkAccessManager::Operation::GetOperation, {}, 5000);

loop.exec();

const auto net_error = m_downloader->lastOutputError();
const QString content_type = m_downloader->lastContentType().toString();
const QString content_type = m_downloader->lastContentType();

if (net_error != QNetworkReply::NetworkError::NoError) {
is_error = true;
Expand All @@ -189,7 +207,7 @@ void TextBrowserViewer::setUrl(const QUrl& url) {
html_str = QSL("<img src=\"%1\">").arg(nonconst_url.toString());
}
else {
html_str = QString::fromUtf8(m_downloader->lastOutputData());
html_str = decodeHtmlData(m_downloader->lastOutputData(), content_type);
}
}
}
Expand All @@ -199,6 +217,22 @@ void TextBrowserViewer::setUrl(const QUrl& url) {
emit loadingFinished(!is_error);
}

QString TextBrowserViewer::decodeHtmlData(const QByteArray& data, const QString& content_type) const {
QString found_charset = QRegularExpression("charset=([0-9a-zA-Z-_]+)").match(content_type).captured(1);
QTextCodec* codec = QTextCodec::codecForName(found_charset.toLocal8Bit());

if (codec == nullptr) {
// No suitable codec for this encoding was found.
// Use UTF-8.
qWarningNN << LOGSEC_GUI << "Did not find charset for content-type" << QUOTE_W_SPACE_DOT(content_type);
return QString::fromUtf8(data);
}
else {
qDebugNN << LOGSEC_GUI << "Found charset for content-type" << QUOTE_W_SPACE_DOT(content_type);
return codec->toUnicode(data);
}
}

QString TextBrowserViewer::html() const {
return m_currentHtml;
}
Expand Down Expand Up @@ -468,10 +502,20 @@ void TextBrowserViewer::downloadNextNeededResource() {
else {
QUrl res = m_neededResources.takeFirst();

m_resourceDownloader.data()->manipulateData(qApp->web()->unescapeHtml(res.toString()),
QNetworkAccessManager::Operation::GetOperation,
{},
5000);
QMetaObject::invokeMethod(m_resourceDownloader,
"manipulateData",
Qt::ConnectionType::QueuedConnection,
qApp->web()->unescapeHtml(res.toString()),
QNetworkAccessManager::Operation::GetOperation,
QByteArray(),
5000);

/*
m_resourceDownloader.data()->manipulateData(qApp->web()->unescapeHtml(res.toString()),
QNetworkAccessManager::Operation::GetOperation,
{},
5000);
*/
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class TextBrowserViewer : public QTextBrowser, public WebViewer {

public:
explicit TextBrowserViewer(QWidget* parent = nullptr);
virtual ~TextBrowserViewer();

QVariant loadOneResource(int type, const QUrl& name);

Expand Down Expand Up @@ -78,7 +79,7 @@ class TextBrowserViewer : public QTextBrowser, public WebViewer {
void resourceDownloaded(const QUrl& url,
QNetworkReply::NetworkError status,
int http_code,
const QByteArray &contents = QByteArray());
const QByteArray& contents = QByteArray());

signals:
void reloadDocument();
Expand All @@ -96,11 +97,14 @@ class TextBrowserViewer : public QTextBrowser, public WebViewer {
void setHtmlPrivate(const QString& html, const QUrl& base_url);
BlockingResult blockedWithAdblock(const QUrl& url);

QString decodeHtmlData(const QByteArray& data, const QString& content_type) const;

private:
QScopedPointer<Downloader> m_downloader;
bool m_resourcesEnabled;
QList<QUrl> m_neededResources; // All URLs here must be resolved.
QScopedPointer<Downloader> m_resourceDownloader;
Downloader* m_resourceDownloader;
QThread* m_resourceDownloaderThread;
QMap<QUrl, QByteArray> m_loadedResources; // All URLs here must be resolved.
QPixmap m_placeholderImage;
QPixmap m_placeholderImageError;
Expand Down
4 changes: 2 additions & 2 deletions src/librssguard/network-web/downloader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ void Downloader::finished() {
m_lastCookies = {};
}

m_lastContentType = reply->header(QNetworkRequest::KnownHeaders::ContentTypeHeader);
m_lastContentType = reply->header(QNetworkRequest::KnownHeaders::ContentTypeHeader).toString();
m_lastOutputError = reply->error();
m_lastHttpStatusCode = reply->attribute(QNetworkRequest::Attribute::HttpStatusCodeAttribute).toInt();
m_lastHeaders.clear();
Expand Down Expand Up @@ -382,7 +382,7 @@ QList<QNetworkCookie> Downloader::lastCookies() const {
return m_lastCookies;
}

QVariant Downloader::lastContentType() const {
QString Downloader::lastContentType() const {
return m_lastContentType;
}

Expand Down
4 changes: 2 additions & 2 deletions src/librssguard/network-web/downloader.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Downloader : public QObject {
QByteArray lastOutputData() const;
QNetworkReply::NetworkError lastOutputError() const;
QList<HttpResponse> lastOutputMultipartData() const;
QVariant lastContentType() const;
QString lastContentType() const;
QList<QNetworkCookie> lastCookies() const;
int lastHttpStatusCode() const;
QMap<QString, QString> lastHeaders() const;
Expand Down Expand Up @@ -114,7 +114,7 @@ class Downloader : public QObject {
QList<HttpResponse> m_lastOutputMultipartData;
QNetworkReply::NetworkError m_lastOutputError;
int m_lastHttpStatusCode;
QVariant m_lastContentType;
QString m_lastContentType;
QList<QNetworkCookie> m_lastCookies;
QMap<QString, QString> m_lastHeaders;
};
Expand Down
4 changes: 2 additions & 2 deletions src/librssguard/network-web/networkfactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ NetworkResult NetworkFactory::performNetworkOperation(const QString& url,
output = downloader.lastOutputData();

result.m_networkError = downloader.lastOutputError();
result.m_contentType = downloader.lastContentType().toString();
result.m_contentType = downloader.lastContentType();
result.m_cookies = downloader.lastCookies();
result.m_httpCode = downloader.lastHttpStatusCode();
result.m_headers = downloader.lastHeaders();
Expand Down Expand Up @@ -335,7 +335,7 @@ NetworkResult NetworkFactory::performNetworkOperation(const QString& url,
output = downloader.lastOutputMultipartData();

result.m_networkError = downloader.lastOutputError();
result.m_contentType = downloader.lastContentType().toString();
result.m_contentType = downloader.lastContentType();
result.m_cookies = downloader.lastCookies();
result.m_httpCode = downloader.lastHttpStatusCode();
result.m_headers = downloader.lastHeaders();
Expand Down

0 comments on commit 9c7f550

Please sign in to comment.