Skip to content

Commit

Permalink
wip save page state in session file
Browse files Browse the repository at this point in the history
With Qt 5.15 the underlying chromium version switched to using a new
page serialization format. Additionally the deserialization is much more
fragile and we don't have enough of an API from webengine to pull out
enough of the necessary page attributes to construct something that we
can deserialize into a new page that works. So now we attempt to dump
the whole page state along with the session. This should be backwards
compatible so if you save a session with this version of qutebrowser on
5.14 and then load it on 5.15 you should still get your session history.

I have no idea how fragile the parsing is.

TODO:
cleanup
abstractisize (make work for webkit)
test on other versions and document weirdnesses
add version number to session file and save backups on loading older ones?

ref #5359
  • Loading branch information
toofar committed Jun 27, 2021
1 parent d8ebdb2 commit 8891ce9
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 29 deletions.
32 changes: 17 additions & 15 deletions qutebrowser/browser/browsertab.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,9 @@ class AbstractHistoryItem:
def __init__(self, url: QUrl, title: str, *, original_url: QUrl = None,
active: bool = False,
user_data: Dict[str, Any] = None,
last_visited: Optional[QDateTime] = None) -> None:
last_visited: Optional[QDateTime] = None,
page_state: Optional[bytes] = None,
) -> None:
self.url = url
if original_url is None:
self.original_url = url
Expand All @@ -689,6 +691,7 @@ def __init__(self, url: QUrl, title: str, *, original_url: QUrl = None,
self.active = active
self.user_data = user_data
self.last_visited = last_visited
self.page_state = page_state

def __repr__(self) -> str:
return utils.get_repr(self, constructor=True, url=self.url,
Expand All @@ -697,8 +700,12 @@ def __repr__(self) -> str:
last_visited=self.last_visited)

@classmethod
def from_qt(cls, qt_item: TypeHistoryItem,
active: bool = False) -> 'AbstractHistoryItem':
def from_qt(
cls,
qt_item: TypeHistoryItem,
active: bool = False,
page_state: Optional[bytes] = None,
) -> 'AbstractHistoryItem':
"""Convert `TypeHistoryItem` to `AbstractHistoryItem`."""
raise NotImplementedError

Expand All @@ -724,16 +731,7 @@ def __len__(self) -> int:
return len(self._history)

def __iter__(self) -> Iterator:
if self.to_load:
return iter(self.to_load)

return iter([
self._tab.history_item_from_qt(
item,
active=idx == self.current_idx()
)
for idx, item in enumerate(self._history.items())
])
raise NotImplementedError

def _check_count(self, count: int) -> None:
"""Check whether the count is positive."""
Expand Down Expand Up @@ -1243,7 +1241,8 @@ def navigation_blocked(self) -> bool:
@pyqtSlot(QUrl)
def _on_before_load_started(self, url: QUrl) -> None:
"""Adjust the title if we are going to visit a URL soon."""
qtutils.ensure_valid(url)
if not url.isValid():
return
url_string = url.toDisplayString()
log.webview.debug("Going to start loading: {}".format(url_string))
self.title_changed.emit(url_string)
Expand Down Expand Up @@ -1542,14 +1541,17 @@ def grab_pixmap(self, rect: QRect = None) -> Optional[QPixmap]:
return pic

def history_item_from_qt(self, item: TypeHistoryItem,
active: bool = False) -> AbstractHistoryItem:
active: bool = False,
page_state: Optional[bytes] = None,
) -> AbstractHistoryItem:
raise NotImplementedError

def new_history_item(
self, url: QUrl, original_url: QUrl,
title: str, active: bool,
user_data: Dict[str, Any],
last_visited: Optional[QDateTime],
page_state: Optional[bytes],
) -> AbstractHistoryItem:
"""Create `AbstractHistoryItem` from history item data."""
raise NotImplementedError
Expand Down
10 changes: 7 additions & 3 deletions qutebrowser/browser/webengine/tabhistory.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
# Qt 5.14 added version 4 which also serializes favicons:
# https://codereview.qt-project.org/c/qt/qtwebengine/+/279407
# However, we don't care about those, so let's keep it at 3.
HISTORY_STREAM_VERSION = 3
HISTORY_STREAM_VERSION = 4


def _serialize_item(item, stream):
Expand Down Expand Up @@ -64,12 +64,13 @@ def _serialize_item(item, stream):

## QByteArray(encodedPageState.data(), encodedPageState.size());
# \xff\xff\xff\xff
qtutils.serialize_stream(stream, QByteArray())
qtutils.serialize_stream(stream, QByteArray(item.page_state))

## static_cast<qint32>(entry->GetTransitionType());
# chromium/ui/base/page_transition_types.h
# \x00\x00\x00\x00
stream.writeInt32(0) # PAGE_TRANSITION_LINK
# 0 = PAGE_TRANSITION_LINK, 8 = PAGE_TRANSITION_RELOAD
stream.writeInt32(8)

## entry->GetHasPostData();
# \x00
Expand Down Expand Up @@ -106,6 +107,9 @@ def _serialize_item(item, stream):
# \x00\x00\x00\xc8
stream.writeInt(200)

# favicon
qtutils.serialize_stream(stream, QUrl())


def serialize(items):
"""Serialize a list of WebHistoryItems to a data stream.
Expand Down
108 changes: 99 additions & 9 deletions qutebrowser/browser/webengine/webenginetab.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import dataclasses
import re
import html as html_utils
from typing import cast, Union, Optional
from typing import cast, Union, Optional, Iterator

from PyQt5.QtCore import (pyqtSlot, Qt, QPoint, QPointF, QUrl,
QObject, QFile, QIODevice, QTimer)
Expand Down Expand Up @@ -602,7 +602,7 @@ class WebEngineHistoryItem(browsertab.AbstractHistoryItem):
"""History item data derived from QWebEngineHistoryItem."""

@classmethod
def from_qt(cls, qt_item, active=False):
def from_qt(cls, qt_item, active=False, page_state=None):
"""Construct a WebEngineHistoryItem from a Qt history item.
Args:
Expand All @@ -617,6 +617,7 @@ def from_qt(cls, qt_item, active=False):
active=active,
user_data=None,
last_visited=qt_item.lastVisited(),
page_state=page_state,
)


Expand Down Expand Up @@ -660,10 +661,10 @@ def _load_items_workaround(self, items):
self._tab.load_url(url)

def load_items(self, items):
webengine_version = version.qtwebengine_versions().webengine
if webengine_version >= utils.VersionNumber(5, 15):
self._load_items_workaround(items)
return
#webengine_version = version.qtwebengine_versions().webengine
#if webengine_version >= utils.VersionNumber(5, 15):
# self._load_items_workaround(items)
# return

if items:
self._tab.before_load_started.emit(items[-1].url)
Expand All @@ -684,6 +685,77 @@ def _on_load_finished():
self._tab.load_finished.connect(_on_load_finished)


def _deserialize_tab_history(history):
page_states = []

from PyQt5.QtCore import QDataStream, QByteArray, QIODevice
data = QByteArray()
stream = QDataStream(data, QIODevice.ReadWrite)
assert stream.status() == QDataStream.Ok
stream << history
assert stream.status() == QDataStream.Ok
stream.device().seek(0)

#print(f'raw data: {bytes(data).hex()}\n\n')

version = stream.readInt()
#print(f"version: {version}")

count = stream.readInt()
#print(f"count: {count}")

current = stream.readInt()
#print(f"current index: {current}")

for i in range(count):
#print(f"\n---- entry {i} ----")
url = QUrl()
stream >> url
#print(f"GetVirtualURL: {url}")

title = stream.readString()
#print(f"title: {title}")

pagestate = QByteArray()
stream >> pagestate
#print(f"pagestate: {bytes(pagestate).hex()}")
page_states.append(pagestate)

transition = stream.readInt32()
#print(f"transition: {hex(transition)}")

has_post_data = stream.readBool()
#print(f"has post data: {has_post_data}")

referrer = QUrl()
stream >> referrer
#print(f"referrer: {referrer}")

referrer_policy = stream.readInt32()
#print(f"referrer policy: {referrer_policy}")

original_request_url = QUrl()
stream >> original_request_url
#print(f"original request url: {original_request_url}")

is_overriding_user_agent = stream.readBool()
#print(f"is overriding user agent: {is_overriding_user_agent}")

time = stream.readInt64()
#print(f"time: {time}")

http_status = stream.readInt()
#print(f"http status: {http_status}")

if version >= 4:
favicon_url = QUrl()
stream >> favicon_url
#print(f"favicon url: {favicon_url}")

assert stream.atEnd()
return page_states


class WebEngineHistory(browsertab.AbstractHistory):

"""QtWebEngine implementations related to page history."""
Expand All @@ -692,6 +764,21 @@ def __init__(self, tab):
super().__init__(tab)
self.private_api = WebEngineHistoryPrivate(tab)

def __iter__(self) -> Iterator:
if self.to_load:
return iter(self.to_load)

page_states = _deserialize_tab_history(self._history)

return iter([
self._tab.history_item_from_qt(
item,
active=idx == self.current_idx(),
page_state=bytes(page_states[idx]),
)
for idx, item in enumerate(self._history.items())
])

def load(self) -> None:
"""Load the tab history."""
super().load()
Expand Down Expand Up @@ -1486,18 +1573,21 @@ def _show_error_page(self, url, error):
self.set_html(error_page)

def history_item_from_qt(self, item: browsertab.TypeHistoryItem,
active: bool = False) -> WebEngineHistoryItem:
return WebEngineHistoryItem.from_qt(item, active)
active: bool = False,
page_state: Optional[bytes] = None,
) -> WebEngineHistoryItem:
return WebEngineHistoryItem.from_qt(item, active, page_state=page_state)

def new_history_item(self, url, original_url, title, active, user_data,
last_visited):
last_visited, page_state):
return WebEngineHistoryItem(
url=url,
original_url=original_url,
title=title,
active=active,
user_data=user_data,
last_visited=last_visited,
page_state=page_state,
)

@pyqtSlot(QUrl, 'QAuthenticator*', 'QString')
Expand Down
14 changes: 13 additions & 1 deletion qutebrowser/browser/webkit/webkittab.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import functools
import typing
import xml.etree.ElementTree
from typing import cast, Iterable, Optional
from typing import cast, Iterable, Optional, Iterator

from PyQt5.QtCore import pyqtSlot, Qt, QUrl, QPoint, QTimer, QSizeF, QSize
from PyQt5.QtGui import QIcon
Expand Down Expand Up @@ -688,6 +688,18 @@ def __init__(self, tab):
super().__init__(tab)
self.private_api = WebKitHistoryPrivate(tab)

def __iter__(self) -> Iterator:
if self.to_load:
return iter(self.to_load)

return iter([
self._tab.history_item_from_qt(
item,
active=idx == self.current_idx(),
)
for idx, item in enumerate(self._history.items())
])


class WebKitElements(browsertab.AbstractElements):

Expand Down
14 changes: 13 additions & 1 deletion qutebrowser/misc/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import shutil
import pathlib
from typing import Any, Iterable, MutableMapping, MutableSequence, Optional, Union, cast
import base64

import yaml
from PyQt5.QtCore import Qt, QObject, QPoint, QTimer, QUrl, QDateTime
Expand Down Expand Up @@ -208,6 +209,10 @@ def _save_tab_item(self, tab, idx, item):

data['pinned'] = tab.data.pinned

data['page_state'] = None
if item.page_state:
data['page_state'] = base64.b64encode(item.page_state)

return data

def _save_tab(self, tab, active):
Expand All @@ -220,6 +225,7 @@ def _save_tab(self, tab, active):
data: _JsonType = {'history': []}
if active:
data['active'] = True

for idx, item in enumerate(tab.history):
item_data = self._save_tab_item(tab, idx, item)
data['history'].append(item_data)
Expand Down Expand Up @@ -387,13 +393,19 @@ def _load_tab(self, new_tab, data): # noqa: C901
else:
last_visited = None

page_state = b''
if 'page_state' in histentry:
page_state = base64.b64decode(histentry['page_state'])

entry = new_tab.new_history_item(
url=url,
original_url=orig_url,
title=histentry['title'],
active=active,
user_data=user_data,
last_visited=last_visited)
last_visited=last_visited,
page_state=page_state,
)
entries.append(entry)

if active:
Expand Down

0 comments on commit 8891ce9

Please sign in to comment.