diff --git a/newsfragments/109.feature b/newsfragments/109.feature new file mode 100644 index 000000000..73a72120b --- /dev/null +++ b/newsfragments/109.feature @@ -0,0 +1 @@ +initial implementation of a snapshot represented as a Tahoe-LAFS immutable directory diff --git a/src/magic_folder/common.py b/src/magic_folder/common.py index d107cd177..2dc68da70 100644 --- a/src/magic_folder/common.py +++ b/src/magic_folder/common.py @@ -90,12 +90,19 @@ def bad_response(url, response): body = yield readBody(response) raise BadResponseCode(url, response.code, body) + def get_node_url(node_directory): - node_url_file = os.path.join(node_directory, u"node.url") - node_url = fileutil.read(node_url_file).strip() + """ + :param str node_directory: A Tahoe client directory + :returns: the base URL for the Tahoe given client. + """ + node_url_file = os.path.join(node_directory, u"node.url") + with open(node_url_file, "r") as f: + node_url = f.read().strip() return node_url + @inlineCallbacks def tahoe_mkdir(nodeurl, treq): """ diff --git a/src/magic_folder/snapshot.py b/src/magic_folder/snapshot.py new file mode 100644 index 000000000..5fe0d1b58 --- /dev/null +++ b/src/magic_folder/snapshot.py @@ -0,0 +1,740 @@ +# Copyright 2020 Least Authority TFA GmbH +# See COPYING for details. + +""" +Functions and types that implement snapshots +""" +from __future__ import print_function + +import io +import os +import time +import json +import base64 +from tempfile import mkstemp + +import attr +import nacl + +from twisted.internet.defer import ( + inlineCallbacks, + returnValue, +) +from twisted.web.client import ( + BrowserLikeRedirectAgent, + FileBodyProducer, +) + +from .common import ( + get_node_url, +) +from .magic_folder import ( + load_magic_folders, + save_magic_folders, +) + +from twisted.web.client import ( + readBody, +) + +from twisted.web.http import ( + OK, + CREATED, +) + +from hyperlink import ( + DecodedURL, +) + +from .common import ( + bad_response, +) + +from eliot import ( + start_action, + register_exception_extractor, +) + +from nacl.signing import ( + SigningKey, + VerifyKey, +) +from nacl.encoding import ( + Base64Encoder, +) + +# version of the snapshot scheme +SNAPSHOT_VERSION = 1 + + +@attr.s +class RemoteAuthor(object): + """ + Represents the author of a RemoteSnapshot. + + :ivar name: author's name + + :ivar nacl.signing.VerifyKey verify_key: author's public key + """ + + name = attr.ib() + verify_key = attr.ib(validator=[attr.validators.instance_of(VerifyKey)]) + + def to_json(self): + """ + :return: a representation of this author in a dict suitable for + JSON encoding (see also create_author_from_json) + """ + return { + "name": self.name, + "verify_key": self.verify_key.encode(encoder=Base64Encoder), + } + + +@attr.s +class LocalAuthor(object): + """ + Represents the author of a LocalSnapshot. + + :ivar name: author's name + + :ivar nacl.signing.SigningKey signing_key: author's private key + """ + + name = attr.ib() + signing_key = attr.ib(validator=[attr.validators.instance_of(SigningKey)]) + + # NOTE: this should not be converted to JSON or serialized + # (because it contains a private key), it is only for signing + # LocalSnapshot instances as they're uploaded. Convert to a + # RemoteAuthor for serialization + + @property + def verify_key(self): + """ + :returns: the VerifyKey corresponding to our signing key + """ + return self.signing_key.verify_key + + def to_remote_author(self): + """ + :returns: a RemoteAuthor instance. This will be the same, but have + only a verify_key corresponding to our signing_key + """ + return create_author(self.name, self.signing_key.verify_key) + + +def create_local_author(name): + """ + Create a new local author with a freshly generated private + (signing) key. This author will not be saved on disk anywhere; see + `write_local_author` to do that. + + :param name: the name of this author + """ + signing_key = SigningKey.generate() + return LocalAuthor( + name, + signing_key, + ) + + +def write_local_author(local_author, magic_folder_name, config): + """ + Writes a LocalAuthor instance beside other magic-folder data in the node-directory + """ + key_fname = "magicfolder_{}.privkey".format(magic_folder_name) + path = config.get_config_path("private", key_fname) + keydata_base64 = local_author.signing_key.encode(encoder=Base64Encoder) + key_data = { + "author_name": local_author.name, + "author_private_key": keydata_base64, + } + with open(path, "w") as f: + json.dump(key_data, f) + + +def create_local_author_from_config(config, name=None): + """ + :param config: a Tahoe config instance (created via `allmydata.client.read_config`) + + :param name: which Magic Folder to use (or 'default') + + :returns: a LocalAuthor instance from our configuration + """ + # private-keys go in "/private/magicfolder_.privkey" + # to mirror where the sqlite database goes + if name is None: + name = "default" + nodedir = config.get_config_path() + magic_folders = load_magic_folders(nodedir) + if name not in magic_folders: + raise RuntimeError( + "No magic-folder named '{}'".format(name) + ) + + # if we don't have author information for this magic-folder yet, + # we need to create it .. so either throw a catch-able exception + # so the caller can do that, or just make one up here? I guess we + # could not have names at all for authors which gets rid of the + # UI/UX concern about "where would an author name come from, + # anyway". + + author_raw = config.get_private_config("magicfolder_{}.privkey".format(name)) + author_data = json.loads(author_raw) + + return LocalAuthor( + name=author_data[u"author_name"], + signing_key=SigningKey( + author_data[u"author_private_key"], + encoder=Base64Encoder, + ), + ) + + +def create_author(name, verify_key): + """ + :param name: arbitrary name for this author + + :param verify_key: a NaCl VerifyKey instance + + :returns: a RemoteAuthor instance. + """ + if not isinstance(verify_key, VerifyKey): + raise ValueError("verify_key must be a nacl.signing.VerifyKey") + + return RemoteAuthor( + name=name, + verify_key=verify_key, + ) + + +def create_author_from_json(data): + """ + :returns: a RemoteAuthor instance from the given data (which + would usually come from RemoteAuthor.to_json()) + """ + permitted_keys = required_keys = ["name", "verify_key"] + for k in data.keys(): + if k not in permitted_keys: + raise ValueError( + u"Unknown RemoteAuthor key '{}'".format(k) + ) + for k in required_keys: + if k not in data: + raise ValueError( + u"RemoteAuthor requires '{}' key".format(k) + ) + verify_key = VerifyKey(data["verify_key"], encoder=Base64Encoder) + return create_author(data["name"], verify_key) + + +def sign_snapshot(local_author, snapshot, content_capability): + """ + Signs the given snapshot with the given key + + :param SigningKey signing_key: the key to sign the data with + + :param LocalSnapshot snapshot: snapshot to sign + + :param str content_capability: the Tahoe immutable capability of + the actual snapshot data. + + :returns: bytes representing the signature or exception on + error. + """ + # XXX what do we sign? Should we hash it first? Ask our cryptographers + data_to_sign = ( + u"{content_capability}\n" + u"{name}\n" + ).format( + content_capability=content_capability, + name=snapshot.name, + ) + return local_author.signing_key.sign(data_to_sign.encode("utf8")) + + +def verify_snapshot_signature(remote_author, alleged_signature, content_capability, snapshot_name): + """ + Verify the given snapshot. + + :returns: True on success or exception otherwise + """ + # See comments about "data_to_sign" in sign_snapshot + data_to_verify = ( + u"{content_capability}\n" + u"{name}\n" + ).format( + content_capability=content_capability, + name=snapshot_name, + ) + return remote_author.verify_key.verify( + data_to_verify.encode("utf8"), + alleged_signature, + ) + + +# XXX see also comments about maybe a ClientSnapshot and a Snapshot or +# so; "uploading" a ClientSnapshot turns it into a Snapshot. + +# XXX LocalSnapshot and RemoteSnapshot are both immutable python objects + +# XXX need to think about how to represent parents: +# - "parents_raw" was meant to capture "we sometimes only have capability-strings" +# - "lazy" loading ... +# - what about LocalSnapshots? (we want them in parent lists but they have no capability yet) +# - want to avoid recursively loading ALL the versions (until we need to) + + +# XXX might want to think about an ISnapshot interface? +# - stuff common between LocalSnapshot and RemoteSnapshot +# - count_parents() +# - fetch_parent() +# - name, author, metadata, ... + +# XXX: (ram) do we need author info in LocalSnapshot? Isn't it relevant only in RemoteSnapshot? +# XXX: THINK (ram) when will we get capability strings for parents instead of RemoteSnapshots? Never +# in the case of LocalSnapshots, because we will either be changing (extending) an existing +# LocalSnapshot in the offline usecase. In that case, our parent is another LocalSnapshot. +# In the case when our copy is older than one of the other client and we do a fast forward, +# then we will be fetching RemoteSnapshots recursively until one of the parents is our current +# snapshot. +@attr.s +class LocalSnapshot(object): + name = attr.ib() + author = attr.ib() # XXX must be "us" / have a signing-key + metadata = attr.ib() + content_path = attr.ib() # full filesystem path to our stashed contents + parents_remote = attr.ib() # DECIDE: are these RemoteSnapshots or just capability-strings? + parents_local = attr.ib() # LocalSnapshot instances + + def count_parents(self): + """ + XXX or something + """ + return len(self.parents_local) + len(self.parents_remote) + + @inlineCallbacks + def fetch_parent(self, index, tahoe_client): + """ + Returns all parents as LocalSnapshot or RemoteSnapshot (or a mix) + instances -- possibly instantiating RemoteSnapshot instances + from capability-strings. + """ + + def get_content_producer(self): + """ + :returns: an IBodyProducer that gives you all the bytes of the + on-disc content. Raises an error if we already have a + capability. + """ + # XXX or, maybe instead of .contents we want "a thing that + # produces file-like objects" so that e.g. if you call + # get_content_producer() twice it works.. + return FileBodyProducer( + open(self.content_path, "rb") + ) + + +@attr.s +class RemoteSnapshot(object): + """ + Represents a snapshot corresponding to a particular version of a + file authored by a particular human. + + :ivar name: the name of this Snapshot. This is a mangled path + relative to our local magic-folder path. + + :ivar metadata: a dict containing metadata about this Snapshot. + + :ivar parents_raw: list of capablitiy-strings instances of our + parents + + :ivar author: SnapshotAuthor instance + + :ivar capability: an immutable CHK:DIR2 capability-string. + """ + + name = attr.ib() + author = attr.ib() # any SnapshotAuthor instance + metadata = attr.ib() + capability = attr.ib() + parents_raw = attr.ib() + content_cap = attr.ib() + + def count_parents(self): + """ + XXX or something + """ + return len(self.parents_raw) + + @property + def signature(self): + return self.metadata["author_signature"] + + @inlineCallbacks + def fetch_parent(self, parent_index, tahoe_client): + """ + Fetches the given parent. + + :param int parent_index: which parent to fetch + + :param tahoe_client: the Tahoe client to use to retrieve + capabilities + + :returns: a Snapshot instance or raises an exception + """ + assert parent_index >= 0 and parent_index < len(self.parents_raw) + raise NotImplemented + + @inlineCallbacks + def fetch_content(self, tahoe_client, writable_file): + """ + Fetches our content from the grid, returning an IBodyProducer? + """ + yield tahoe_client.stream_capability(self.content_cap, writable_file) + # XXX returns some kind of streaming API to download the content + # XXX OR it just downloads all the content into memory and returns it? + # XXX OR you give this a file-like to WRITE into + + + +@inlineCallbacks +def create_snapshot_from_capability(snapshot_cap, tahoe_client): + """ + Create a RemoteSnapshot from a snapshot capability string + + :param tahoe_client: the Tahoe client to use + + :param str capability_string: unicode data representing the + immutable CHK:DIR2 directory containing this snapshot. + + :return Deferred[Snapshot]: RemoteSnapshot instance on success. + Otherwise an appropriate exception is raised. + """ + + action = start_action( + action_type=u"magic_folder:tahoe_snapshot:create_snapshot_from_capability", + ) + with action: + snapshot_json = yield tahoe_client.download_capability(snapshot_cap) + snapshot = json.loads(snapshot_json) + debug = json.dumps(snapshot, indent=4) + + # create SnapshotAuthor + author_cap = snapshot["author"][1]["ro_uri"] + author_json = yield tahoe_client.download_capability(author_cap) + snapshot_author = json.loads(author_json) + + author = create_author_from_json(snapshot_author) + + verify_key = VerifyKey(snapshot_author["verify_key"], Base64Encoder) + metadata = snapshot["content"][1]["metadata"]["magic_folder"] + + if "snapshot_version" not in metadata: + raise Exception( + "No 'snapshot_version' in snapshot metadata" + ) + if metadata["snapshot_version"] != SNAPSHOT_VERSION: + raise Exception( + "Unknown snapshot_version '{}' (not '{}')".format( + metadata["snapshot_version"], + SNAPSHOT_VERSION, + ) + ) + + name = metadata["name"] + content_cap = snapshot["content"][1]["ro_uri"] + + # verify the signature + signature = base64.b64decode(metadata["author_signature"]) + verify_snapshot_signature(author, signature, content_cap, name) + + # find all parents + parents = [k for k in snapshot.keys() if k.startswith('parent')] + parent_caps = [snapshot[parent][1]["ro_uri"] for parent in parents] + + returnValue( + RemoteSnapshot( + name=name, + author=create_author( + name=snapshot_author["name"], + verify_key=verify_key, + ), + metadata=metadata, + content_cap=content_cap, + parents_raw=parent_caps, # XXX: This needs to be populated + capability=snapshot_cap.decode("ascii"), + ) + ) + + +@inlineCallbacks +def create_snapshot(name, author, data_producer, snapshot_stash_dir, parents): + """ + Creates a new LocalSnapshot instance that is in-memory only (call + write_snapshot_to_tahoe() to commit it to a grid). Actually not + in-memory, we should commit it to local disk / database before + ever returning LocalSnapshot instance + + :param author: SnapshotAuthor which must have a valid signing-key + + :param data_producer: file-like object that can read + + XXX file-like, okay, does it need to support random-access? just + skip-ahead? none of that? Should we pass a 'way to create a + file-like producer' instead (so e.g. we don't even open the file + if we never look at the content)? + + XXX thinking of local-first, the data_producer here is used just + once (immediately) to copy all the data into some "staging" area + local to our node-dir (or at least specified in our confing) + .. then we have a canonical full path we can "burn in" to the + LocalSnapshot and it can produce new readers on-demand. + """ + yield + + if not isinstance(author, LocalAuthor): + raise ValueError( + "LocalSnapshot author must be LocalAuthor instance" + ) + + parents_remote = [] + parents_local = [] + for idx, parent in enumerate(parents): + if isinstance(parent, LocalSnapshot): + parents_local.append(parent) + elif isinstance(parent, RemoteSnapshot): + parents_remote.append(parent) + else: + raise ValueError( + "Parent {} is type {} not LocalSnapshot or RemoteSnapshot".format( + idx, + type(parent), + ) + ) + + chunk_size = 1024*1024 # 1 MiB + # 1. create a temp-file in our stash area + temp_file_fd, temp_file_name = mkstemp( + prefix="snap", + dir=snapshot_stash_dir, + ) + try: + # 2. stream data_producer into our temp-file + data = data_producer.read(chunk_size) + if data and len(data) > 0: + os.write(temp_file_fd, data) + finally: + os.close(temp_file_fd) + + # XXX FIXME write snapshot meta-information (including the path + # temp_file_name) into the snapshot database. TDB + + now = time.time() + returnValue( + LocalSnapshot( + name=name, + author=author, + # XXX look at how this becomes metadata in RemoteSnapshot, etc + # .. we want to overwrite the signature (or .. only add it if + # missing?) + metadata={ + "ctime": now, + "mtime": now, +# "magic_folder": { +# "author_signature": "pending", +# } + }, + content_path=temp_file_name, + parents_remote=parents_remote, + parents_local=parents_local, + ) + ) + +# XXX THINK +# how to do parents? +# +# - LocalSnapshot can only have RemoteSnapshots as parents? +# -> we could allow both, and the "upload" function has to recursively upload parents +# - RemoteSnapshots only have RemoteSnapshots as parents +# +# offline-first? +# - can we build up multiple LocalSnapshots and upload them later? +# - can we do ^ but maintain them across client re-starts? +# - ideal use-case is: +# - build a bunch of LocalSnapshots +# - shut down daemon +# - restart daemon (series of LocalSnapshots still there) +# - upload LocalSnapshots, making them RemoteSnapshots +# - have to 'stash' actual contents somewhere (maybe /.stash/*) +# - huge PRO of doing ^ first is that our client can crash and not lose snapshots +# - then LocalSnapshot can have LocalSnapshot instances in parents list +# +# tahoe as a library? +# - can we start with TahoeClient and build out? +# - can TahoeClient remain in this repo, get promoted later? +# - ... + + +@inlineCallbacks +def write_snapshot_to_tahoe(snapshot, author_key, tahoe_client): + """ + Writes a LocalSnapshot object to the given tahoe grid. Will also + (recursively) upload any LocalSnapshot parents. + + :param LocalSnapshot snapshot: the snapshot to upload. + + :param SigningKey author_key: a NaCl SigningKey corresponding to + the author who will sign this snapshot (and also any + LocalSnapshots that are parents of this one). + + :returns: a RemoteSnapshot instance + """ + # XXX probably want to give this a progress= instance (kind-of + # like teh one in Tahoe) so we can track upload progress for + # status-API for GUI etc. + + # XXX might want to put all this stuff into a queue so we only + # upload X at a time etc. -- that is, the "real" API is probably a + # high-level one that just queues up the upload .. a low level one + # "actually does it", including re-tries etc. Currently, this + # function is both of those. + + parents_raw = [] # raw capability strings + + if len(snapshot.parents_remote): + for parent in snapshot.parents_remote: + parents_raw.append(parent.capability) + + # we can't reference any LocalSnapshot objects we have, so they + # must be uploaded first .. we do this up front so we're also + # uploading the actual content of the parents first. + if len(snapshot.parents_local): + # if parent is a RemoteSnapshot, we are sure that its parents + # are themselves RemoteSnapshot. Recursively upload local parents + # first. + to_upload = snapshot.parents_local[:] # shallow-copy the thing we'll iterate + for parent in to_upload: + parent_remote_snapshot = yield write_snapshot_to_tahoe(parent, author_key, tahoe_client) + parents_raw.append(parent_remote_snapshot.capability) + snapshot.parents_local.remove(parent) # the shallow-copy to_upload not affected + + # upload the content itself + content_cap = yield tahoe_client.create_immutable(snapshot.get_content_producer()) + + # sign the snapshot (which can only happen after we have the content-capability) + author_signature = sign_snapshot(author_key, snapshot, content_cap) + author_signature_base64 = base64.b64encode(author_signature.signature) + author_data = snapshot.author.to_remote_author().to_json() + + author_cap = yield tahoe_client.create_immutable( + json.dumps(author_data) + ) + # print("author_cap: {}".format(author_cap)) + + # create the actual snapshot: an immutable directory with + # some children: + # - "content" -> RO cap (arbitrary data) + # - "author" -> RO cap (json) + # - "parent0" -> RO cap to a Snapshot + # - "parentN" -> RO cap to a Snapshot + + # XXX actually, should we make the parent pointers a sub-dir, + # maybe? that might just be extra complexity for no gain, but + # "parents/0", "parents/1" aesthetically seems a bit nicer. + + # XXX FIXME timestamps are bogus + + content_metadata = { + "snapshot_version": SNAPSHOT_VERSION, + "name": snapshot.name, + "author_signature": author_signature_base64, + } + data = { + "content": [ + "filenode", { + "ro_uri": content_cap, + "metadata": { + "ctime": 1202777696.7564139, + "mtime": 1202777696.7564139, + "magic_folder": content_metadata, + "tahoe": { + "linkcrtime": 1202777696.7564139, + "linkmotime": 1202777696.7564139 + } + } + }, + ], + "author": [ + "filenode", { + "ro_uri": author_cap, + "metadata": { + "ctime": 1202777696.7564139, + "mtime": 1202777696.7564139, + "tahoe": { + "linkcrtime": 1202777696.7564139, + "linkmotime": 1202777696.7564139 + } + } + } + ], + } + + # XXX 'parents_remote1 are just Tahoe capability-strings for now + for idx, parent_cap in enumerate(parents_raw): + data[u"parent{}".format(idx)] = [ + "dirnode", { + "ro_uri": parent_cap, + # is not having "metadata" permitted? + # (ram) Yes, looks like. + } + ] + + # print("data: {}".format(data)) + snapshot_cap = yield tahoe_client.create_immutable_directory(data) + + # XXX *now* is the moment we can remove the LocalSnapshot from our + # local database -- so if at any moment before now there's a + # failure, we'll try again. + returnValue( + RemoteSnapshot( + # XXX: we are copying over the name from LocalSnapshot, it is not + # stored on tahoe at the moment. This means, when we read back a snapshot + # we cannot create a RemoteSnapshot object from a cap string. + name=snapshot.name, + author=create_author( # remove signing_key, doesn't make sense on remote snapshots + name=snapshot.author.name, + verify_key=snapshot.author.verify_key, + ), + metadata=content_metadata, # XXX not authenticated by signature... + parents_raw=parents_raw, # XXX FIXME (at this point, will have parents' immutable caps .. parents don't work yet) + capability=snapshot_cap.decode("ascii"), + content_cap=content_cap, + ) + ) + + +class TahoeWriteException(Exception): + """ + Something went wrong while doing a `tahoe put`. + """ + def __init__(self, code, body): + self.code = code + self.body = body + + def __str__(self): + return ''.format( + self.code, + self.body, + ) + + +# log exception caused while doing a tahoe put API +register_exception_extractor(TahoeWriteException, lambda e: {"code": e.code, "body": e.body }) + + +# XXX FIXME +# use TahoeWriteException in write_snapshot_to_tahoe +# body = yield readBody(response) +# raise TahoeWriteException(response.code, body) diff --git a/src/magic_folder/tahoe_client.py b/src/magic_folder/tahoe_client.py new file mode 100644 index 000000000..a654c7eb9 --- /dev/null +++ b/src/magic_folder/tahoe_client.py @@ -0,0 +1,123 @@ +# Copyright 2020 Least Authority TFA GmbH +# See COPYING for details. + +import json + +from twisted.internet.defer import ( + inlineCallbacks, + returnValue, +) +from twisted.web.client import ( + FileBodyProducer, +) +from hyperlink import ( + DecodedURL, +) + +import attr + +from .common import ( + get_node_url, +) + + +@attr.s +class TahoeClient(object): + """ + An object that knows how to call a particular tahoe client's + WebAPI. Usually this means a node-directory (to get the base URL) + and a treq client (to make HTTP requests). + """ + + # node_directory = attr.ib() + url = attr.ib() + http_client = attr.ib() + + @inlineCallbacks + def create_immutable_directory(self, directory_data): + post_uri = self.url.replace( + path=(u"uri",), + query=[(u"t", u"mkdir-immutable")], + ) + res = yield self.http_client.post( + post_uri.to_text(), + json.dumps(directory_data), + ) + capability_string = yield res.content() + returnValue( + capability_string.strip() + ) + + @inlineCallbacks + def create_immutable(self, producer): + """ + :param producer: can take anything that treq's data= method to + treq.request allows which is currently: str, file-like or + IBodyProducer. See + https://treq.readthedocs.io/en/release-20.3.0/api.html#treq.request + """ + + put_uri = self.url.replace( + path=(u"uri",), + query=[(u"mutable", u"false")], + ) + res = yield self.http_client.put( + put_uri.to_text(), + data=producer, + ) + capability_string = yield res.content() + returnValue( + capability_string.strip() + ) + + @inlineCallbacks + def download_capability(self, cap): + get_uri = self.url.replace( + path=(u"uri",), + query=[(u"uri", cap.decode("ascii"))], + ) + res = yield self.http_client.get(get_uri.to_text()) + data = yield res.content() + returnValue(data) + + @inlineCallbacks + def stream_capability(self, cap, filelike): + get_uri = self.url.replace( + path=(u"uri",), + query=[(u"uri", cap.decode("ascii"))], + ) + res = yield self.http_client.get(get_uri.to_text()) + yield res.collect(filelike.write) + + +@inlineCallbacks +def create_tahoe_client(node_directory, treq_client=None): + """ + Create a new TahoeClient instance that is speaking to a particular + Tahoe node. + + XXX is treq_client= enough of a hook to get a 'testing' treq + client?. + """ + + # real: + # client = create_tahoe_client(tmpdir) + + # testing: + # root = create_fake_tahoe_root() + # client = create_tahoe_client(tmpdir, treq_client=create_tahoe_treq_client(root)) + + # from allmydata.node import read_config ?? + base_url = get_node_url(node_directory) + url = DecodedURL.from_text(base_url) + + if treq_client is None: + treq_client = HTTPClient( + agent=BrowserLikeRedirectAgent(), + ) + client = TahoeClient( + url=url, + http_client=treq_client, + ) + yield # maybe we want to at least try getting / to see if it's alive? + returnValue(client) diff --git a/src/magic_folder/test/matchers.py b/src/magic_folder/test/matchers.py index eaf7d13a5..a5248f31e 100644 --- a/src/magic_folder/test/matchers.py +++ b/src/magic_folder/test/matchers.py @@ -2,6 +2,11 @@ Testtools-style matchers useful to the Tahoe-LAFS test suite. """ +import base64 +from nacl.exceptions import ( + BadSignatureError, +) + import attr from testtools.matchers import ( @@ -61,6 +66,37 @@ def match(self, other): return Mismatch("The signature did not verify.") +@attr.s +class MatchesAuthorSignature(object): + """ + FIXME + """ + snapshot = attr.ib() # LocalSnapshot + remote_snapshot = attr.ib() +## capability = attr.ib() # RemoteSnapshot's capability-string + + def match(self, other): + """ + FIXME + """ + # "other" is the RemoteSnapshot's signature + # XXX need the capability-string of the RemoteSnapshot + public_key = self.snapshot.author.verify_key + alleged_sig = base64.b64decode(self.remote_snapshot.signature) + signed_data = ( + u"{content_capability}\n" + u"{name}\n" + ).format( + content_capability=self.remote_snapshot.content_cap, + name=self.remote_snapshot.name, + ).encode("utf8") + + try: + public_key.verify(signed_data, alleged_sig) + except BadSignatureError: + return Mismatch("The signature did not verify.") + + def matches_storage_announcement(basedir, anonymous=True, options=None): """ Match a storage announcement. diff --git a/src/magic_folder/test/strategies.py b/src/magic_folder/test/strategies.py index 512d79662..1a89f657d 100644 --- a/src/magic_folder/test/strategies.py +++ b/src/magic_folder/test/strategies.py @@ -247,3 +247,6 @@ def an_item(path, progress, size, when): integers(min_value=0), integers(min_value=0, max_value=2 ** 31 - 1), ) + +def magic_folder_filenames(): + return text(min_size=1) diff --git a/src/magic_folder/test/test_magic_folder.py b/src/magic_folder/test/test_magic_folder.py index b6d8a5a23..2da17a32b 100644 --- a/src/magic_folder/test/test_magic_folder.py +++ b/src/magic_folder/test/test_magic_folder.py @@ -9,12 +9,14 @@ from twisted.internet import defer, task, reactor from twisted.python.runtime import platform from twisted.python.filepath import FilePath +from twisted.web.client import Agent from testtools.matchers import ( Not, Is, ContainsDict, Equals, + StartsWith, ) from eliot import ( @@ -45,6 +47,14 @@ inline_callbacks, ) +from treq.client import ( + HTTPClient, +) + +from .fixtures import ( + SelfConnectedClient, +) + from magic_folder.util.eliotutil import ( log_call_deferred, ) @@ -76,6 +86,11 @@ AsyncTestCase, skipIf, ) + +from .cli.common import ( + cli, +) + from .cli.test_magic_folder import MagicFolderCLITestMixin _debug = False diff --git a/src/magic_folder/test/test_snapshot.py b/src/magic_folder/test/test_snapshot.py new file mode 100644 index 000000000..36670aff4 --- /dev/null +++ b/src/magic_folder/test/test_snapshot.py @@ -0,0 +1,471 @@ +import io +import os +import json +import base64 +from tempfile import mktemp +from shutil import rmtree +from functools import partial + +from nacl.signing import ( + SigningKey, + VerifyKey, +) +from nacl.exceptions import ( + BadSignatureError, +) + +from testtools import ( + TestCase, + ExpectedException, +) +from testtools.matchers import ( + Equals, + Raises, + MatchesStructure, + Always, + AfterPreprocessing, + StartsWith, + IsInstance, +) + +from testtools.twistedsupport import ( + succeeded, + failed, +) + +from hypothesis import ( + given, +) +from hypothesis.strategies import ( + binary, + text, +) + +from twisted.internet import defer +from twisted.python.filepath import ( + FilePath, +) +from twisted.web.resource import ( + Resource, +) +from twisted.web.client import ( + Agent, + FileBodyProducer, +) + +from treq.client import ( + HTTPClient, +) +from treq.testing import ( + RequestTraversalAgent, + RequestSequence, + StubTreq, + _SynchronousProducer, # FIXME copy code somewhere, "because private" +) +from allmydata.testing.web import ( + create_tahoe_treq_client, + create_fake_tahoe_root, +) +from allmydata.node import ( + read_config, +) + +from hyperlink import ( + DecodedURL, +) + +from .matchers import ( + MatchesAuthorSignature, +) +from .fixtures import ( + NodeDirectory, +) +from .common import ( + ShouldFailMixin, + SyncTestCase, + AsyncTestCase, + skipIf, +) +from .strategies import ( + magic_folder_filenames, +) +from magic_folder.snapshot import ( + create_author, + create_local_author, + create_local_author_from_config, + write_local_author, + create_author_from_json, + create_snapshot, + create_snapshot_from_capability, + write_snapshot_to_tahoe, +) +from magic_folder.tahoe_client import ( + TahoeClient, +) + + +class TestSnapshotAuthor(AsyncTestCase): + """ + """ + def setUp(self): + """ + We have Alices's signing+verify key but only a verify key for Bob + (in the SnapshotAuthor instance) + """ + d = super(TestSnapshotAuthor, self).setUp() + self.alice = create_local_author("alice") + return d + + def test_author_serialize(self): + js = self.alice.to_remote_author().to_json() + alice2 = create_author_from_json(js) + + self.assertThat( + alice2, + MatchesStructure( + name=Equals(self.alice.name), + verify_key=Equals(self.alice.verify_key), + ) + ) + + def test_author_serialize_extra_data(self): + js = { + "name": "wrong", + "invalid_key": 42, + } + with ExpectedException(ValueError, ".*key 'invalid_key'.*"): + create_author_from_json(js) + + def test_author_serialize_missing_data(self): + js = { + "name": "foo", + # mising verify_key + } + with ExpectedException(ValueError, ".*requires 'verify_key'.*"): + create_author_from_json(js) + + +class TahoeSnapshotTest(TestCase): + """ + Tests for the snapshots + """ + + @defer.inlineCallbacks + def setUp(self): + """ + Set up a fake Tahoe client via treq, a temporary local author and + a stash directory + """ + super(TahoeSnapshotTest, self).setUp() + self.root = create_fake_tahoe_root() + self.http_client = yield create_tahoe_treq_client(self.root) + self.tahoe_client = TahoeClient( + url=DecodedURL.from_text(u"http://example.com"), + http_client=self.http_client, + ) + self.alice = create_local_author("alice") + self.stash_dir = mktemp() + os.mkdir(self.stash_dir) + + def tearDown(self): + super(TahoeSnapshotTest, self).tearDown() + rmtree(self.stash_dir) + + def _download_content(self, snapshot_cap): + d = self.tahoe_client.download_capability(snapshot_cap) + data = json.loads(d.result) + content_cap = data["content"][1]["ro_uri"] + sig = data["content"][1]["metadata"]["magic_folder"]["author_signature"] + # XXX is it "testtools-like" to check the signature here too? + return self.tahoe_client.download_capability(content_cap) + + @given( + content=binary(min_size=1), + filename=magic_folder_filenames(), + ) + def test_create_new_tahoe_snapshot(self, content, filename): + """ + create a new snapshot (this will have no parent snapshots). + """ + data = io.BytesIO(content) + + snapshots = [] + d = create_snapshot( + name=filename, + author=self.alice, + data_producer=data, + snapshot_stash_dir=self.stash_dir, + parents=[], + ) + d.addCallback(snapshots.append) + self.assertThat( + d, + succeeded(Always()), + ) + + d = write_snapshot_to_tahoe(snapshots[0], self.alice, self.tahoe_client) + self.assertThat( + d, + succeeded( + MatchesStructure( + name=Equals(snapshots[0].name), + capability=AfterPreprocessing( + self._download_content, + succeeded(Equals(data.getvalue())), + ), + signature=MatchesAuthorSignature(snapshots[0], d.result), + ), + ), + ) + + @given( + content1=binary(min_size=1), + content2=binary(min_size=1), + filename=magic_folder_filenames(), + ) + def test_create_local_snapshots(self, content1, content2, filename): + """ + Create a local snapshot and then change the content of the file + to make another snapshot. + """ + data1 = io.BytesIO(content1) + parents = [] + + d = create_snapshot( + name=filename, + author=self.alice, + data_producer=data1, + snapshot_stash_dir=self.stash_dir, + parents=[], + ) + d.addCallback(parents.append) + self.assertThat( + d, + succeeded(Always()), + ) + + data2 = io.BytesIO(content2) + d = create_snapshot( + name=filename, + author=self.alice, + data_producer=data2, + snapshot_stash_dir=self.stash_dir, + parents=parents, + ) + d.addCallback(parents.append) + self.assertThat( + d, + succeeded(Always()), + ) + + d = write_snapshot_to_tahoe(parents[1], self.alice, self.tahoe_client) + self.assertThat( + d, + succeeded( + MatchesStructure( + # XXX check signature, ... +# name=Equals(snapshots[0].name), + capability=AfterPreprocessing( + self._download_content, + succeeded(Equals(data2.getvalue())), + ) + ), + ), + ) + + + @given( + content=binary(min_size=1), + filename=magic_folder_filenames(), + ) + def test_snapshot_roundtrip(self, content, filename): + """ + Create a local snapshot, write into tahoe to create a remote snapshot, + then read back the data from the snapshot cap to recreate the remote + snapshot and check if it is the same as the previous one. + """ + data = io.BytesIO(content) + + snapshots = [] + # create LocalSnapshot + d = create_snapshot( + name=filename, + author=self.alice, + data_producer=data, + snapshot_stash_dir=self.stash_dir, + parents=[], + ) + d.addCallback(snapshots.append) + self.assertThat( + d, + succeeded(Always()), + ) + + # create remote snapshot + d = write_snapshot_to_tahoe(snapshots[0], self.alice, self.tahoe_client) + d.addCallback(snapshots.append) + + # snapshots[1] is a RemoteSnapshot + # print("remote snapshot: {}".format(snapshots[1])) + + # now, recreate remote snapshot from the cap string and compare with the original. + # Check whether information is preserved across these changes. + + snapshot_d = create_snapshot_from_capability(snapshots[1].capability, self.tahoe_client) + self.assertThat(snapshot_d, succeeded(Always())) + snapshot = snapshot_d.result + + self.assertThat(snapshot, MatchesStructure(name=Equals(filename))) + content_io = io.BytesIO() + snapshot.fetch_content(self.tahoe_client, content_io) + self.assertEqual(content_io.getvalue(), content) + + @given( + content1=binary(min_size=1), + content2=binary(min_size=1), + filename=magic_folder_filenames(), + ) + def test_snapshots_with_parents(self, content1, content2, filename): + """ + Create a local snapshot, commit it to the grid, then extend that + with another local snapshot and again commit it with the previously + created remote snapshot as the parent. Now, fetch the remote from the + capability string and compare parent to see if they match. + """ + data1 = io.BytesIO(content1) + local_snapshots = [] + remote_snapshots = [] + + # create a local snapshot and commit it to the grid + d = create_snapshot( + name=filename, + author=self.alice, + data_producer=data1, + snapshot_stash_dir=self.stash_dir, + parents=[], + ) + d.addCallback(local_snapshots.append) + self.assertThat( + d, + succeeded(Always()), + ) + + # commit to grid + d = write_snapshot_to_tahoe(local_snapshots[0], self.alice, self.tahoe_client) + d.addCallback(remote_snapshots.append) + + # now modify the same file and create a new local snapshot + # with the last committed remote as parent + data2 = io.BytesIO(content2) + d = create_snapshot( + name=filename, + author=self.alice, + data_producer=data2, + snapshot_stash_dir=self.stash_dir, + parents=remote_snapshots, + ) + + d.addCallback(local_snapshots.append) + self.assertThat( + d, + succeeded(Always()), + ) + + d = write_snapshot_to_tahoe(local_snapshots[1], self.alice, self.tahoe_client) + d.addCallback(remote_snapshots.append) + + # now if we fetch the tip remote snapshot, it should have the previous + # remote snapshot as its parent + + parents_matcher = MatchesStructure(parents_raw=Equals([remote_snapshots[0].capability])) + self.assertThat( + create_snapshot_from_capability(remote_snapshots[1].capability, self.tahoe_client), + succeeded( + parents_matcher + ) + ) + + def test_snapshot_invalid_signature(self): + """ + Hand-create a snapshot in the grid with an invalid signature, + verifying that we fail to read this snapshot out of the grid. + """ + content = (b"fake content\n" * 20) + #content_cap = yield self.tahoe_client.create_immutable(content) + content_cap = self.root.add_data("URI:CHK:", content) + + author_cap = self.root.add_data( + "URI:CHK:", + json.dumps(self.alice.to_remote_author().to_json()) + ) + + bad_sig = base64.b64encode(b"0" * 32) + + # create remote snapshot, but with a bogus signature + data = { + "content": [ + "filenode", { + "ro_uri": content_cap, + "metadata": { + "magic_folder": { + "snapshot_version": 1, + "name": "a_file", + "author_signature": bad_sig, + } + }, + }, + ], + "author": [ + "filenode", { + "ro_uri": author_cap, + "metadata": { + "ctime": 1202777696.7564139, + "mtime": 1202777696.7564139, + "tahoe": { + "linkcrtime": 1202777696.7564139, + "linkmotime": 1202777696.7564139 + } + } + } + ], + } + snapshot_cap = self.root.add_data("URI:DIR2-CHK:", json.dumps(data)) + + snapshot_d = create_snapshot_from_capability(snapshot_cap, self.tahoe_client) + self.assertThat( + snapshot_d, + failed( + AfterPreprocessing( + lambda f: f.value, + IsInstance(BadSignatureError) + ) + ) + ) + + def test_serialize_snapshot_author(self): + """ + Write and then read a LocalAuthor to our node-directory + """ + magic_dir = FilePath(mktemp()) + node = self.useFixture(NodeDirectory(FilePath(mktemp()))) + node.create_magic_folder( + u"default", + u"URI:CHK2:{}:{}:1:1:256".format(u"a"*16, u"a"*32), + u"URI:CHK2:{}:{}:1:1:256".format(u"b"*16, u"b"*32), + magic_dir, + 60, + ) + + config = read_config(node.path.path, "portnum") + author = create_local_author("bob") + write_local_author(author, "default", config) + + # read back the author + bob = create_local_author_from_config(config) + self.assertThat( + bob, + MatchesStructure( + name=Equals("bob"), + verify_key=Equals(author.verify_key), + ) + ) diff --git a/src/magic_folder/util/eliotutil.py b/src/magic_folder/util/eliotutil.py index 7e57df6cb..3e36a2cc0 100644 --- a/src/magic_folder/util/eliotutil.py +++ b/src/magic_folder/util/eliotutil.py @@ -109,6 +109,12 @@ def validator(v): u"(XXX probably not really, don't trust this) The timestamp of the last download of this file.", ) +LATEST_SNAPSHOT = Field.for_types( + u"latest_snapshot", + [unicode, bytes, None], + u"The filecap to which this version of this file was uploaded.", +) + PATHINFO = Field( u"pathinfo", lambda v: None if v is None else {