diff --git a/docs/.gitignore b/docs/.gitignore index 05b595d74c..23592ba08e 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,4 +1,5 @@ -openfl* -models* +# openfl* +# models* +# data* /_build **/.ipynb_checkpoints \ No newline at end of file diff --git a/docs/advanced_topics.rst b/docs/advanced_topics.rst index d5377c6d68..a8635563dc 100644 --- a/docs/advanced_topics.rst +++ b/docs/advanced_topics.rst @@ -15,4 +15,3 @@ Advanced Topics overriding_agg_fn bash_autocomplete_activation log_metric_callback - data_splitting diff --git a/docs/bash_autocomplete_activation.rst b/docs/bash_autocomplete_activation.rst index a82c7f0e9a..8a64ffc582 100644 --- a/docs/bash_autocomplete_activation.rst +++ b/docs/bash_autocomplete_activation.rst @@ -14,18 +14,23 @@ If not use the instruction :ref:`install_initial_steps`. Create ~/.fx-autocomplete.sh script ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + This step need to be done only one time when you don't have `~/.fx-autocomplete.sh` or `~/.fx-autocomplete.sh` have corrupted content. + .. code-block:: console $ _FX_COMPLETE=bash_source fx > ~/.fx-autocomplete.sh Check that command was executed correctly. + .. code-block:: console $ cat ~/.fx-autocomplete.sh Console output should look like example below (Click==8.0.1), but could be different depend on `Click https://click.palletsprojects.com/en/8.0.x/`_ version: + .. code-block:: console + _fx_completion() { local IFS=$'\n' local response @@ -57,15 +62,20 @@ Create ~/.fx-autocomplete.sh script Activate autocomplete feature ~~~~~~~~~~~~~~~~~~~~~ + This step should be done every time when you open a new terminal window. .. code-block:: console + $ source ~/.fx-autocomplete.sh Auto activation autocomplete ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + To save your time you can add autocomplete activation step to `~/.bashrc`. + .. code-block:: bash . ~/.fx-autocomplete.sh + Save `~/.bashrc`. Open new terminal to use updated `~/.bashrc`. diff --git a/docs/manual.rst b/docs/manual.rst index 50152d384c..49fff61cc1 100644 --- a/docs/manual.rst +++ b/docs/manual.rst @@ -6,10 +6,11 @@ Manual ****** .. toctree:: - :maxdepth: 4 + :maxdepth: 2 overview install running_the_federation plan_settings + source/utilities/utilities advanced_topics diff --git a/docs/openfl.rst b/docs/openfl.rst new file mode 100644 index 0000000000..40e1142ee0 --- /dev/null +++ b/docs/openfl.rst @@ -0,0 +1,13 @@ +.. # Copyright (C) 2020-2021 Intel Corporation +.. # SPDX-License-Identifier: Apache-2.0 + +****** +OpenFL structure +****** + +.. toctree:: + :maxdepth: 4 + + source/openfl/components + .. source/openfl/communication + source/openfl/plugins \ No newline at end of file diff --git a/docs/running_the_federation.interactive_api.rst b/docs/running_the_federation.interactive_api.rst deleted file mode 100644 index 00c1c7d226..0000000000 --- a/docs/running_the_federation.interactive_api.rst +++ /dev/null @@ -1,228 +0,0 @@ -.. # Copyright (C) 2020-2021 Intel Corporation -.. # SPDX-License-Identifier: Apache-2.0 - -.. _interactive_api: - -######################################################### -Experimental: |productName| Interactive Python API -######################################################### - -********************************* -Python Interactive API Concepts -********************************* - -Workspace -========== -To initialize the workspace, create an empty folder and a Jupyter notebook (or a Python script) inside it. Root folder of the notebook will be considered as the workspace. -If some objects are imported in the notebook from local modules, source code should be kept inside the workspace. -If one decides to keep local test data inside the workspace, :code:`data` folder should be used as it will not be exported. -If one decides to keep certificates inside the workspace, :code:`cert` folder should be used as it will not be exported. -Only relevant source code or resources should be kept inside the workspace, since it will be zipped and transferred to collaborator machines. - -Python Environment -=================== -Create a virtual Python environment. Please, install only packages that are required for conducting the experiment, since Python environment will be replicated on collaborator nodes. - -****************************************** -Certification -****************************************** -If you have trusted workspace and connection should not be encrypted you can use :code:`disable_tls` option while starting experiment. -Otherwise it is necessary to certify each node participating in the federation. Certificates allow to use mutual tls connection between nodes. -You can certify nodes by your own pki system or use pki provided by OpenFL. It is based on `step-ca `_ -as a server and `step `_ as a client utilities. They are downloaded from github during workspace setup. Regardless of the certification method, -paths to certificates on each node are provided at start of experiment. Pki workflow from OpenFL will be discussed below. - -OpenFL PKI workflow -=================== -Openfl PKI pipeline asumes creating local CA with https server which listen signing requests. -Certificates from each node can be signed by requesting to CA server with special token. -Token must be copied to each node by some secure way. Each step is considered in detail below. - -1. Create CA, i.e create root key pair, CA server config and other. - .. code-block:: console - - $ fx pki install -p --ca-url - | :code:`-p` - path to folder, which will contain ca files. - | :code:`--ca-url` - host and port which ca server will listen - When executing this command, you will be prompted for a password and password confirmation. The password will encrypt some ca files. - This command will also download `step-ca `_ and `step `_ binaries from github. - -2. Run CA https server. - .. code-block:: console - - $ fx pki run -p - | :code:`-p` - path to folder, which will contain ca files. - -3. Get token for some node. - - .. code-block:: console - - $ fx pki get-token -n - | :code:`-n` - subject name, fqdn for director, collaborator name for envoy or api name for api-layer node - - Run this command on ca side, from ca folder. Output is a token which contains JWT (json web token) from CA server and CA - root certificate concatenated together. This JWT have twenty-four hours time-to-live. - -4. Copy token to node side (director or envoy) by some secure channel and run certify command. - .. code-block:: console - - $ fx pki certify -n -t - | :code:`-n` - subject name, fqdn for director, collaborator name for envoy or api name for api-layer node - | :code:`-t` - output token from previous command - This command call step client, to connect to CA server over https. - Https is provided by root certificate which was copy with JWT. - Server authenticates client by JWT and client authenticates server by root certificate. - -Now signed certificate and private key are stored on current node. Signed certificate has one year time-to-live. You should certify all node that will participate in federation: director, all envoys and api-layer node. - -****************************************** -Defining a Federated Learning Experiment -****************************************** -Interactive API allows setting up an experiment from a single entrypoint - a Jupyter notebook or a Python script. -Defining an experiment includes setting up several interface entities and experiment parameters. - -Federation API -=================== -*Federation* entity is introduced to register and keep information about collaborators settings and their local data, as well as network settings to enable communication inside the federation. -Each federation is bound to some Machine Learning problem in a sense that all collaborators dataset shards should follow the same annotation format for all samples. Once you created a federation, it may be used in several subsequent experiments. - -To set up a federation, use Federation Interactive API. - -.. code-block:: python - - from openfl.interface.interactive_api.federation import Federation - -Federation API class should be initialized with the aggregator node FQDN and encryption settings. Someone may disable mTLS in trusted environments or provide paths to the certificate chain of CA, aggregator certificate and private key to enable mTLS. - -.. code-block:: python - - federation = Federation(central_node_fqdn: str, tls: bool, cert_chain: str, agg_certificate: str, agg_private_key: str) - -Federation's :code:`register_collaborators` method should be used to provide an information about collaborators participating in a federation. -It requires a dictionary object - :code:`{collaborator name : local data path}`. - -Experiment API -=================== - -*Experiment* entity allows registering training related objects, FL tasks and settings. -To set up an FL experiment someone should use the Experiment interactive API. - -.. code-block:: python - - from openfl.interface.interactive_api.experiment import FLExperiment - -*Experiment* is being initialized by taking federation as a parameter. - -.. code-block:: python - - fl_experiment = FLExperiment(federation=federation) - -To start an experiment user must register *DataLoader*, *Federated Learning tasks* and *Model* with *Optimizer*. There are several supplementary interface classes for these purposes. - -.. code-block:: python - - from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface - -Registering model and optimizer --------------------------------- - -First, user instantiate and initilize a model and optimizer in their favorite Deep Learning framework. Please, note that for now interactive API supports only *Keras* and *PyTorch* off-the-shelf. -Initialized model and optimizer objects then should be passed to the :code:`ModelInterface` along with the path to correct Framework Adapter plugin inside OpenFL package. If desired DL framework is not covered by existing plugins, someone can implement the plugin's interface and point :code:`framework_plugin` to the implementation inside the workspace. - -.. code-block:: python - - from openfl.interface.interactive_api.experiment import ModelInterface - MI = ModelInterface(model=model_unet, optimizer=optimizer_adam, framework_plugin=framework_adapter) - -Registering FL tasks ---------------------- - -We have an agreement on what we consider to be a FL task. -Interactive API currently allows registering only standalone functions defined in the main module or imported from other modules inside the workspace. -We also have requirements on task signature. Task should accept the following objects: - -1. model - will be rebuilt with relevant weights for every task by `TaskRunner` -2. :code:`data_loader` - data loader that will provide local data -3. device - a device to be used for execution on collaborator machines -4. optimizer (optional) - model optimizer, only for training tasks - -Moreover FL tasks should return a dictionary object with metrics :code:`{metric name: metric value for this task}`. - -:code:`Task Interface` class is designed to register task and accompanying information. -This class must be instantiated, then it's special methods may be used to register tasks. - -.. code-block:: python - - TI = TaskInterface() - - task_settings = { - 'batch_size': 32, - 'some_arg': 228, - } - @TI.add_kwargs(**task_settings) - @TI.register_fl_task(model='my_model', data_loader='train_loader', - device='device', optimizer='my_Adam_opt') - def foo(my_model, train_loader, my_Adam_opt, device, batch_size, some_arg=356) - ... - - -:code:`@TI.register_fl_task()` needs tasks argument names for (model, data_loader, device, optimizer (optional)) that constitute tasks 'contract'. -It adds the callable and the task contract to the task registry. - -:code:`@TI.add_kwargs()` method should be used to set up those arguments that are not included in the contract. - -Registering Federated DataLoader ---------------------------------- - -:code:`DataInterface` is provided to support a remote DataLoader initialization. - -It is initialized with User Dataset class object and all the keyword arguments can be used by dataloaders during training or validation. -User must subclass :code:`DataInterface` and implements several methods. - -* :code:`_delayed_init(self, data_path)` is the most important method. It will be called during collaborator initialization procedure with relevant :code:`data_path` (one that corresponds to the collaborator name that user registered in federation). User Dataset class should be instantiated with local :code:`data_path` here. If dataset initalization procedure differs for some of the collaborators, the initialization logic must be described here. Dataset sharding procedure for test runs should also be described in this method. User is free to save objects in class fields for later use. -* :code:`get_train_loader(self, **kwargs)` will be called before training tasks execution. This method must return anything user expects to recieve in the training task with :code:`data_loader` contract argument. :code:`kwargs` dict holds the same information that was provided during :code:`DataInterface` initialization. -* :code:`get_valid_loader(self, **kwargs)` - see the point above only with validation data -* :code:`get_train_data_size(self)` - return number of samples in local train dataset. -* :code:`get_valid_data_size(self)` - return number of samples in local validation dataset. - -Preparing workspace distribution ---------------------------------- -Now we may use :code:`Experiment` API to prepare a workspace archive for transferring to collaborator's node. In order to run a collaborator, we want to replicate the workspace and the Python environment. - -Instances of interface classes :code:`(TaskInterface, DataInterface, ModelInterface)` must be passed to :code:`FLExperiment.prepare_workspace_distribution()` method along with other parameters. - -This method: - -* Compiles all provided setings to a Plan object. This is the central place where all actors in federation look up their parameters. -* Saves plan.yaml to the :code:`plan/` folder inside the workspace. -* Serializes interface objects on the disk. -* Prepares :code:`requirements.txt` for remote Python environment setup. -* Compressess the workspace to an archive so it can be coppied to collaborator nodes. - -Starting the aggregator ---------------------------- - -As all previous steps done, the experiment is ready to start -:code:`FLExperiment.start_experiment()` method requires :code:`model_interface` object with initialized weights. - -It starts a local aggregator that will wait for collaborators to connect. - -Starting collaborators -======================= - -The process of starting collaborators has not changed. -User must transfer the workspace archive to a remote node and type in console: - -.. code-block:: python - - fx workspace import --archive ws.zip - -Please, note that aggregator and all the collaborator nodes should have the same Python interpreter version as the machine used for defining the experiment. - -then cd to the workspace and run - -.. code-block:: python - - fx collaborator start -d data.yaml -n one - -For more details, please refer to the TaskRunner API section. \ No newline at end of file diff --git a/docs/running_the_federation.rst b/docs/running_the_federation.rst index 8af954978c..ef028a7fb2 100644 --- a/docs/running_the_federation.rst +++ b/docs/running_the_federation.rst @@ -16,11 +16,7 @@ First make sure you've installed the software :ref:`using these instructions ` plugins. |productName| ships with Pytorch and Tensorflow 2.x framework adapters. +These framework adapters are intended to be extensible, +and we encourage users to contribute new adapters for DL frameworks they would like to see supported in |productName|. + +Model is loaded with relevant weights before every task and at the end of the training task, weights are extracted to be sent to the central node and aggregated. +*Collaborator* instance is created by *Envoy* (described below) when a new experiment is submitted. +Every *Collaborator* is a unique service as it is loaded with a local *Shard Descriptor* to perform tasks included in an FL experiment. + +.. _openfl_ll_components: + +Long-living components +############# + +Director +========== + +*Director* is a long-living entity; it is a central node of the federation and may take in several experiments +(with the same data interface). When an experiment is reported director starts an aggregator and sends +the experiment data to involved envoys; during the experiment, Director oversees the aggregator and updates +the user on the status of the experiment. +*Director* runs two services: one for frontend users and another one for envoys. It can distribute an experiment +reported with the frontend API across the federation and communicate back a trained model snapshot and metrics. +*Director* supports several concurrent frontend connections (yet experiments are run one by one). +To learn more about using the |productName| frontend Python API, please refer to :ref:`interactive_api` + + +Envoy +========= + +|productName| comes with another long-existing actor called *Envoy*. It runs on collaborator machines connected to a *Director*. +There is one to one mapping between *Envoys* and Dataset shards: every *Envoy* needs exactly one +`Shard Descriptor `_ to run. +When the *Director* starts an experiment, *Envoy* will accept the experiment workspace, prepare the environment and start a *Collaborator*. + + +Static Diagram +############# + +.. figure:: static_diagram.svg \ No newline at end of file diff --git a/docs/source/openfl/interface.rst b/docs/source/openfl/interface.rst new file mode 100644 index 0000000000..959f76aeb3 --- /dev/null +++ b/docs/source/openfl/interface.rst @@ -0,0 +1,12 @@ +.. # Copyright (C) 2020-2021 Intel Corporation +.. # SPDX-License-Identifier: Apache-2.0 + +****** +|productName| plugins +****** + +.. toctree:: + :maxdepth: 2 + + `...`_ + `...`_ \ No newline at end of file diff --git a/docs/source/openfl/plugins.rst b/docs/source/openfl/plugins.rst new file mode 100644 index 0000000000..2bc06bac45 --- /dev/null +++ b/docs/source/openfl/plugins.rst @@ -0,0 +1,73 @@ +.. # Copyright (C) 2020-2021 Intel Corporation +.. # SPDX-License-Identifier: Apache-2.0 + +****** +|productName| Plugin Components +****** + +.. toctree:: + :maxdepth: 2 + + framework_adapter_ + serializer_plugin_ + + +|productName| is designed to be a flexible and extensible framework. Plugins are interchangeable parts of +|productName| components. Different plugins support varying usage scenarios. |productName| users are free to provide +their implementations of |productName| plugins to support desired behavior. + +.. _framework_adapter: + +Framework Adapter +###################### + +Framework Adapter plugins enable |productName| support for Deep Learning frameworks usage in FL experiments. +All the framework-specific operations on model weights are isolated in this plugin so |productName| can be framework-agnostic. +The Framework adapter plugin interface is simple: there are two required methods to load and extract tensors from +a model and an optimizer. + +:code:`get_tensor_dict` method accepts a model and optionally an optimizer. It should return a dictionary :code:`{tensor_name : ndarray}` +that maps tensor names to tensors in the NumPy representation. + +.. code-block:: python + + @staticmethod + def get_tensor_dict(model, optimizer=None) -> dict: + +:code:`set_tensor_dict` method accepts a tensor dictionary, a model, and optionally an optimizer. It loads weights from the tensor dictionary +to the model in place. Tensor names in the dictionary match corresponding names set in :code:`get_tensor_dict` + +.. code-block:: python + + @staticmethod + def set_tensor_dict(model, tensor_dict, optimizer=None, device='cpu') -> None: + +If your new framework model cannot be directly serialized with pickle-type libraries, you can optionally +implement the :code:`serialization_setup` method to prepare the model object for serialization. + +.. code-block:: python + + def serialization_setup(): + + +.. _serializer_plugin: + +Experiment Serializer +###################### + +Serializer plugins are used on the Frontend API to serialize the Experiment components and then on Envoys to deserialize them back. +Currently, the default serializer is based on pickling. + +A Serializer plugin must implement :code:`serialize` method that creates a python object's representation on disk. + +.. code-block:: python + + @staticmethod + def serialize(object_, filename: str) -> None: + +As well as :code:`restore_object` that will load previously serialized object from disk. + +.. code-block:: python + + @staticmethod + def restore_object(filename: str): diff --git a/docs/source/openfl/static_diagram.svg b/docs/source/openfl/static_diagram.svg new file mode 100644 index 0000000000..8f816d4e88 --- /dev/null +++ b/docs/source/openfl/static_diagram.svg @@ -0,0 +1 @@ +Friday, 27 August 2021, 16:25 Moscow Standard TimeContainer diagram for OpenFLOpenFL[Software System]Central node-Collaborator node-Data scientist[Person]A person or group of peopleusing OpenFLEnvoy[Container]A long-living entity that can adapt alocal data set and spawncollaborators+Collaborator manager[Person]Data owner's representativecontrolling EnvoyDirector manager[Person]-Collaborator[Container]Actor executing tasks on local datainside one experiment+Python API component[Container]A set of tools to setup register FLExperiments+Director[Container]A long-living entity that can spawnaggregators-Aggregator[Container]Model server and collaboratororchestrator-Launches. Setsup globalFederationsettings--Remove link.Link options.Launches.Provides localdatasetShardDescriptors--Remove link.Link options.Sends locallytuned tensorsand trainingmetrics--Remove vertex.Remove link.Link options.Provides FL Plans,Tasks, Models,DataLoaders--Remove link.Link options.Sends tasks andinitial tensors--Remove vertex.Remove link.Link options.Approves, SendsFL experiments--Remove vertex.Remove link.Link options.Communicatesdataset info,Sends statusupdates--Remove vertex.Remove link.Link options.Creates aninstance tomaintain an FLexperiment--Remove link.Link options.Creates aninstance tomaintain an FLexperiment--Remove link.Link options.Sendsinformationabout theFederation.Returns trainingartifacts.--Remove vertex.Remove link.Link options.Registers FLexperiments--Remove vertex.Remove link.Link options. \ No newline at end of file diff --git a/docs/source/utilities/pki.rst b/docs/source/utilities/pki.rst new file mode 100644 index 0000000000..933296b5b2 --- /dev/null +++ b/docs/source/utilities/pki.rst @@ -0,0 +1,78 @@ +.. # Copyright (C) 2020-2021 Intel Corporation +.. # SPDX-License-Identifier: Apache-2.0 + +============================ +|productName| PKI solutions +============================ + +.. toctree:: + :maxdepth: 2 + + semi_automatic_certification_ + manual_certification_ + +.. _semi_automatic_certification: + + +Certification of Actors in Federation with Semi-automatic PKI +****************************************** + +If you have trusted workspace and connection should not be encrypted you can use :code:`disable_tls` option while starting experiment. +Otherwise it is necessary to certify each node participating in the federation. Certificates allow to use mutual tls connection between nodes. +You can certify nodes by your own PKI system or use PKI provided by OpenFL. It is based on `step-ca `_ +as a server and `step `_ as a client utilities. They are downloaded from github during workspace setup. Regardless of the certification method, +paths to certificates on each node are provided at start of experiment. Pki workflow from OpenFL will be discussed below. + +OpenFL PKI workflow +=================== +Openfl PKI pipeline asumes creating local CA with https server which listen signing requests. +Certificates from each node can be signed by requesting to CA server with special token. +Token must be copied to each node by some secure way. Each step is considered in detail below. + +1. Create CA, i.e create root key pair, CA server config and other. + .. code-block:: console + + $ fx pki install -p --ca-url + | :code:`-p` - path to folder, which will contain ca files. + | :code:`--ca-url` - host and port which ca server will listen + When executing this command, you will be prompted for a password and password confirmation. The password will encrypt some ca files. + This command will also download `step-ca `_ and `step `_ binaries from github. + +2. Run CA https server. + .. code-block:: console + + $ fx pki run -p + | :code:`-p` - path to folder, which will contain ca files. + +3. Get token for some node. + + .. code-block:: console + + $ fx pki get-token -n + | :code:`-n` - subject name, fqdn for director, collaborator name for envoy or api name for api-layer node + + Run this command on ca side, from ca folder. Output is a token which contains JWT (json web token) from CA server and CA + root certificate concatenated together. This JWT have twenty-four hours time-to-live. + +4. Copy token to node side (director or envoy) by some secure channel and run certify command. + .. code-block:: console + + $ fx pki certify -n -t + | :code:`-n` - subject name, fqdn for director, collaborator name for envoy or api name for api-layer node + | :code:`-t` - output token from previous command + This command call step client, to connect to CA server over https. + Https is provided by root certificate which was copy with JWT. + Server authenticates client by JWT and client authenticates server by root certificate. + +Now signed certificate and private key are stored on current node. Signed certificate has one year time-to-live. You should certify all node that will participate in federation: director, all envoys and api-layer node. + + + +.. _manual_certification: + + +Manual PKI +************ + +This solution is embedded into the Aggregator-based |productName| workflow. +Please, refer to the :ref:`instruction_manual_certs` section. \ No newline at end of file diff --git a/docs/data_splitting.rst b/docs/source/utilities/splitters_data.rst similarity index 64% rename from docs/data_splitting.rst rename to docs/source/utilities/splitters_data.rst index 33f0b326ef..d436de3c7e 100644 --- a/docs/data_splitting.rst +++ b/docs/source/utilities/splitters_data.rst @@ -2,20 +2,20 @@ .. # SPDX-License-Identifier: Apache-2.0 .. _data_splitting: -=============================== -Specifying custom data splits -=============================== -------------------------------- -Usage -------------------------------- -|productName| allows developers to use custom data splits **for single-node simulation**. -In order to do this, you should: +************************************ +Dataset Splitters +************************************ -Python API -========== -Choose from predefined |productName| aggregation functions: +|productName| allows developers to use specify custom data splits **for simulation runs on a single dataset**. + +You may apply data splitters differently depending on |productName| workflow that you follow. + +Native Python API +================== + +Choose from predefined |productName| data splitters functions: - ``openfl.plugins.data_splitters.EqualNumPyDataSplitter`` (default) - ``openfl.plugins.data_splitters.RandomNumPyDataSplitter`` @@ -25,10 +25,10 @@ Or create an implementation of :class:`openfl.plugins.data_splitters.NumPyDataSp and pass it to FederatedDataset constructor as either ``train_splitter`` or ``valid_splitter`` keyword argument. -CLI -==== +Using in Shard Descriptor +================== -Choose from predefined |productName| aggregation functions: +Choose from predefined |productName| data splitters functions: - ``openfl.plugins.data_splitters.EqualNumPyDataSplitter`` (default) - ``openfl.plugins.data_splitters.RandomNumPyDataSplitter`` @@ -41,17 +41,19 @@ After defining the splitting behavior, you need to use it on your data to perfor This function receives ``data`` - NumPy array required to build the subsets of data indices (see definition of :meth:`openfl.plugins.data_splitters.NumPyDataSplitter.split`). It could be the whole dataset, or labels only, or anything else. ``split`` function returns a list of lists of indices which represent the collaborator-wise indices groups. - .. code-block:: python - X_train, y_train = ... # train set - X_valid, y_valid = ... # valid set - train_splitter = RandomNumPyDataSplitter() - valid_splitter = RandomNumPyDataSplitter() - # collaborator_count value is passed to DataLoader constructor - # shard_num can be evaluated from data_path - train_idx = train_splitter.split(y_train, collaborator_count)[shard_num] - valid_idx = valid_splitter.split(y_valid, collaborator_count)[shard_num] - X_train_shard = X_train[train_idx] - X_valid_shard = X_valid[valid_idx] +.. code-block:: python + + X_train, y_train = ... # train set + X_valid, y_valid = ... # valid set + train_splitter = RandomNumPyDataSplitter() + valid_splitter = RandomNumPyDataSplitter() + # collaborator_count value is passed to DataLoader constructor + # shard_num can be evaluated from data_path + train_idx = train_splitter.split(y_train, collaborator_count)[shard_num] + valid_idx = valid_splitter.split(y_valid, collaborator_count)[shard_num] + X_train_shard = X_train[train_idx] + X_valid_shard = X_valid[valid_idx] .. note:: + By default, we shuffle the data and perform equal split (see :class:`openfl.plugins.data_splitters.EqualNumPyDataSplitter`). diff --git a/docs/source/utilities/utilities.rst b/docs/source/utilities/utilities.rst new file mode 100644 index 0000000000..3d5047de9d --- /dev/null +++ b/docs/source/utilities/utilities.rst @@ -0,0 +1,12 @@ +.. # Copyright (C) 2020-2021 Intel Corporation +.. # SPDX-License-Identifier: Apache-2.0 + +****** +|productName| Utilities +****** + +.. toctree:: + :maxdepth: 2 + + pki + splitters_data \ No newline at end of file diff --git a/docs/source/workflow/director_based_workflow.rst b/docs/source/workflow/director_based_workflow.rst new file mode 100644 index 0000000000..3b7c2cd098 --- /dev/null +++ b/docs/source/workflow/director_based_workflow.rst @@ -0,0 +1,363 @@ +.. # Copyright (C) 2020 Intel Corporation +.. # Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. + +.. _director_workflow: + +************************ +Director-based workflow +************************ + +.. toctree:: + :maxdepth: 2 + + establishing_federation_director_ + interactive_api_ + + +.. _establishing_federation_director: + +Establishing a long-living Federation with Director +####################################### + +1. Install |productName| +================== + +Make sure that you installed |productName| in your virtual Python environment. +If not, use the instruction :ref:`install_initial_steps`. + +2. Implement Shard Descriptors +================== + +Then the data owners need to implement `Shard Descriptors` Python classes. + +|productName| framework provides a ‘Shard descriptor’ interface that should be described on every collaborator node +to provide a unified data interface for FL experiments. Abstract “Shard descriptor” should be subclassed and +all its methods should be implemented to describe the way data samples and labels will be loaded from disk +during training. Shard descriptor is a subscriptable object that implements :code:`__getitem__()` and :code:`len()` methods +as well as several additional methods to access ‘sample shape’, ‘target shape’, and ‘shard description’ text +that may be used to identify participants during experiment definition and execution. + +3. (Optional) Create certificates using Step-CA +================== + +The use of mTLS is strongly recommended for deployments in untrusted environments to establish participant identity and +to encrypt communication. Users may either import certificates provided by their organization or utilize +:ref:`PKI ` provided by |productName|. + +4. Start Director +================== + +Director is a central component in the Federation. It should be started on a node with at least two open ports. +Learn more about the Director component here: :ref:`openfl_ll_components` + +Create Director workspace +------------------- + +Director requires a folder to operate in. Recieved experiments will be deployed in this folder. +Moreover, supplementary files like Director's config files and certificates may be stored in this folder. +One may use CLI command to create a structured workspace for Director with a default config file. + + .. code-block:: console + + $ fx director create-workspace -p director_ws + +Tune Director config +------------------- + +Director should be started from a config file. Basic config file should contain the Director's node FQDN, an open port, +and :code:`sample_shape` and :code:`target_shape` fields with string representation of the unified data interface in the Federation. +But it also may contain paths to certificates. + +Use CLI to start Director +------------------- + +When the Director's config has been set up, one may use CLI to start the Director. Without mTLS protection: + + .. code-block:: console + + $ fx director start --disable-tls -c director_config.yaml + +In the case of a certified Federation: + + .. code-block:: console + + $ fx director start -c director_config.yaml \ + -rc cert/root_ca.crt \ + -pk cert/priv.key \ + -oc cert/open.crt + +5. Start Envoys +================== + +Envoys are |productName|'s 'agents' on collaborator nodes that may recieve an experiment archive and provide +access to local data. +When started Envoy will try to connect to the Director. + +Create Envoy workspace +------------------- + +The Envoy component also requires a folder to operate in. Use the following CLI command to create a workspace +with convenient folder structure and default Envoy's config and Shard Descriptor Python script: + + .. code-block:: console + + $ fx envoy create-workspace -p envoy_ws + +Setup Envoy's config +------------------- + +Unlike Director’s config, the one for Envoy should contain settings for the local Shard Descriptor. +The template field must be filled with the address of the local Shard Descriptor class, and settings filed +should list arbitrary settings required to initialize the Shard Descriptor. + +Use CLI to start Envoy +------------------- + +To start the Envoy without mTLS use the following CLI command: + + .. code-block:: console + + $ fx envoy start -n env_one --disable-tls \ + --shard-config-path shard_config.yaml -d director_fqdn:port + +Alternatively, use the following command to establish a secured connection: + + .. code-block:: console + + $ ENVOY_NAME=envoy_example_name + + $ fx envoy start -n "$ENVOY_NAME" \ + --shard-config-path shard_config.yaml \ + -d director_fqdn:port -rc cert/root_ca.crt \ + -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt + + +6. Describing an FL experimnet using Interactive Python API +==================================== + +At this point, data scientists may register their experiments to be executed in the federation. +|productName| provides a separate frontend Director’s client and :ref:`Interactive Python API ` +to register experiments. + + +.. _interactive_api: + +Beta: |productName| Interactive Python API +####################################### + +The |productName| Python Interactive API should help data scientists to adapt single node training code for +running in the FL manner. The process of defining an FL experimnent is fully decoupled from the establishing +a Federation routine. Everything that a data scientist needs to prepare an experiment is a Python interpreter and access to the Director. + +Python Interactive API Concepts +=============================== + +Workspace +---------- +To initialize the workspace, create an empty folder and a Jupyter notebook (or a Python script) inside it. Root folder of the notebook will be considered as the workspace. +If some objects are imported in the notebook from local modules, source code should be kept inside the workspace. +If one decides to keep local test data inside the workspace, :code:`data` folder should be used as it will not be exported. +If one decides to keep certificates inside the workspace, :code:`cert` folder should be used as it will not be exported. +Only relevant source code or resources should be kept inside the workspace, since it will be zipped and transferred to collaborator machines. + +Python Environment +--------------------- +Create a virtual Python environment. Please, install only packages that are required for conducting the experiment, since Python environment will be replicated on collaborator nodes. + + + +Defining a Federated Learning Experiment +======================================== + +Interactive API allows to register and start an FL experiment from a single entry point - a Jupyter notebook or a Python script. +An FL experiment definition process includes setting up several interface entities and experiment parameters. + +Federation API +---------------- +*Federation* entity is introduced to register and keep information about collaborators settings and their local data, +as well as network settings to enable communication inside the federation. +Each federation is bound to some Machine Learning problem in a sense that all collaborators dataset shards should +follow the same annotation format for all samples. Once you created a federation, it may be used in several +subsequent experiments. + +To set up a federation, use Federation Interactive API. + +.. code-block:: python + + from openfl.interface.interactive_api.federation import Federation + +Federation API class should be initialized with the aggregator node FQDN and encryption settings. User may disable mTLS in trusted environments or provide paths to the certificate chain of CA, aggregator certificate and private key to enable mTLS. + +.. code-block:: python + + federation = Federation( + client_id: str, director_node_fqdn: str, director_port: str + tls: bool, ca_cert_chain: str, cert: str, private_key: str) + +* Federation's :code:`get_dummy_shard_descriptor` method should be used to create a fummy Shard Descriptor that + fakes access to real data. It may be used for debugging the user's experiment pipeline. +* Federation's :code:`get_shard_registry` method returns information about the envoys connected to the Director + and their Shard Descriptors. + +Experiment API +---------------- + +*Experiment* entity allows registering training related objects, FL tasks and settings. +To set up an FL experiment someone should use the Experiment interactive API. + +.. code-block:: python + + from openfl.interface.interactive_api.experiment import FLExperiment + +*Experiment* is being initialized by taking a Federation object and the experiment name as parameters. + +.. code-block:: python + + fl_experiment = FLExperiment(federation: Federation, experiment_name: str) + +To start an experiment user must register *DataLoader*, *Federated Learning tasks* and *Model* with *Optimizer*. +There are several supplementary interface classes for these entities. + +.. code-block:: python + + from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface + +Registering model and optimizer +-------------------------------- + +First, user instantiate and initilize a model and optimizer in their favorite Deep Learning framework. +Please, note that for now interactive API supports only *Keras* and *PyTorch* off-the-shelf. +Initialized model and optimizer objects then should be passed to the :code:`ModelInterface` along with the +path to correct Framework Adapter plugin inside |productName| package. If desired DL framework is not covered by +existing plugins, user can implement the plugin's interface and point :code:`framework_plugin` to the implementation +inside the workspace. + +.. code-block:: python + + from openfl.interface.interactive_api.experiment import ModelInterface + MI = ModelInterface(model, optimizer, framework_plugin: str) + +Registering FL tasks +--------------------- + +|productName| has a specific concept of an FL task. +Interactive API currently allows registering only standalone functions defined in the main module or +imported from other modules inside the workspace. +We also have requirements on task signature. Task should accept the following objects: + +1. model - will be rebuilt with relevant weights for every task by `TaskRunner` +2. :code:`data_loader` - data loader that will provide local data +3. device - a device to be used for execution on collaborator machines +4. optimizer (optional) - model optimizer, only for training tasks + +Moreover FL tasks should return a dictionary object with metrics :code:`{metric name: metric value for this task}`. + +:code:`Task Interface` class is designed to register task and accompanying information. +This class must be instantiated, then it's special methods may be used to register tasks. + +.. code-block:: python + + TI = TaskInterface() + + task_settings = { + 'batch_size': 32, + 'some_arg': 228, + } + @TI.add_kwargs(**task_settings) + @TI.register_fl_task(model='my_model', data_loader='train_loader', + device='device', optimizer='my_Adam_opt') + def foo(my_model, train_loader, my_Adam_opt, device, batch_size, some_arg=356) + ... + + +:code:`@TI.register_fl_task()` needs tasks argument names for (model, data_loader, device, optimizer (optional)) that constitute tasks 'contract'. +It adds the callable and the task contract to the task registry. + +:code:`@TI.add_kwargs()` method should be used to set up those arguments that are not included in the contract. + +Registering Federated DataLoader +--------------------------------- + +:code:`DataInterface` is provided to support seamless remote data adaption. + +As the *Shard Descriptor's* responsibilities are reading and formating the local data, the *DataLoader* is expected to +contain batching and augmenting data logic, common for all collaborators. + +User must subclass :code:`DataInterface` and implement the following methods: + +.. code-block:: python + + class CustomDataLoader(DataInterface): + def __init__(self, **kwargs): + # Initialize superclass with kwargs: this array will be passed + # to get_data_loader methods + super().__init__(**kwargs) + # Set up augmentation, save required parameters, + # use it as you regular dataset class + validation_fraction = kwargs.get('validation_fraction', 0.5) + ... + + @property + def shard_descriptor(self): + return self._shard_descriptor + + @shard_descriptor.setter + def shard_descriptor(self, shard_descriptor): + self._shard_descriptor = shard_descriptor + # You can implement data splitting logic here + # Or update your data set according to local Shard Descriptor atributes if required + + def get_train_loader(self, **kwargs): + # these are the same kwargs you provided to __init__, + # But passed on a collaborator machine + bs = kwargs.get('train_batch_size', 32) + return foo_loader() + + # so on, see the full list of methods below + +* Shard Descriptor setter and getter methods: + :code:`shard_descriptor(self, shard_descriptor)` setter is the most important method. It will be called during the *Collaborator* + initialization procedure with the local Shard Descriptor. Any logic that is triggered with the Shard Descriptor replacement + must be also put here. +* :code:`get_train_loader(self, **kwargs)` will be called before training tasks execution. This method must return anything the user expects to receive in the training task with :code:`data_loader` contract argument. :code:`kwargs` dict holds the same information that was provided during :code:`DataInterface` initialization. +* :code:`get_valid_loader(self, **kwargs)` - see the point above (just replace training with validation) +* :code:`get_train_data_size(self)` - return number of samples in local train dataset. Use the information provided by Shard Descriptor, take into account your train / validation split. +* :code:`get_valid_data_size(self)` - return number of samples in local validation dataset. + +User Dataset class should be instantiated to pass further to the *Experiment* object. Dummy *Shard Descriptor* +(or a custom local one) may be set up to test the augmentation or batching pipeline. +Keyword arguments used during initialization on the frontend node may be used during dataloaders construction on collaborator machines. + + +Starting an FL experiment +======================================== +Now we may use :code:`Experiment` API to prepare a workspace archive for transferring to the *Director*. In order to run *Collaborators*, we want to replicate the workspace and the Python environment +on remote machines. + +Instances of interface classes :code:`(TaskInterface, DataInterface, ModelInterface)` must be passed to :code:`FLExperiment.start()` method along with other parameters. + +This method: + +* Compiles all provided settings to a Plan object. The Plan is the central place where all actors in federation look up their parameters. +* Saves plan.yaml to the :code:`plan/` folder inside the workspace. +* Serializes interface objects on the disk. +* Prepares :code:`requirements.txt` for remote Python environment setup. +* Compresses the whole workspace to an archive. +* Sends the experiment archive to the Director so it may distribute the archive across the Federation and start the *Aggregator*. + +Observing the Experiment execution +---------------------------------- + +If the Experiment was accepted by the *Director* user can oversee its execution with +:code:`FLexperiment.stream_metrics()` method that will is able to print metrics from the FL tasks (and save tensorboard logs). + +When the Experiment is finished, user may retrieve trained models in the native format using :code:`FLexperiment.get_best_model()` +and :code:`FLexperiment.get_last_model()` metods. + +:code:`FLexperiment.remove_experiment_data()` allows erasing the experiment's artifacts from the Director. + +When the Experiment is finished +---------------------------------- + +Users may utilize the same Federation object to report another experiment or even schedule several experiments that +will be executed one by one. \ No newline at end of file diff --git a/docs/source/workflow/running_the_federation.agg_based.rst b/docs/source/workflow/running_the_federation.agg_based.rst new file mode 100644 index 0000000000..4797e71b2a --- /dev/null +++ b/docs/source/workflow/running_the_federation.agg_based.rst @@ -0,0 +1,25 @@ +.. # Copyright (C) 2020-2021 Intel Corporation +.. # SPDX-License-Identifier: Apache-2.0 + +.. _running_the_federation_aggregator_based: + +********************** +Aggregator-based workflow. +********************** + +First make sure you've installed the software :ref:`using these instructions ` + +.. figure:: images/openfl_flow.png + +.. centered:: 100K foot view of OpenFL workflow + +The high-level workflow is shown in the figure above. Note that once OpenFL is installed on all nodes of the federation and every member of the federation has a valid PKI certificate, all that is needed to run an instance of a federated workload is to distribute the workspace to all federation members and then run the command to start the node (e.g. :code:`fx aggregator start`/:code:`fx collaborator start`). In other words, most of the work is setting up an initial environment on all of the federation nodes that can be used across new instantiations of federations. + +.. toctree:: + :maxdepth: 2 + + running_the_federation.notebook + running_the_federation.baremetal + running_the_federation.docker + running_the_federation.certificates + running_the_federation.start_nodes.rst diff --git a/docs/running_the_federation.baremetal.rst b/docs/source/workflow/running_the_federation.baremetal.rst similarity index 100% rename from docs/running_the_federation.baremetal.rst rename to docs/source/workflow/running_the_federation.baremetal.rst diff --git a/docs/running_the_federation.certificates.rst b/docs/source/workflow/running_the_federation.certificates.rst similarity index 99% rename from docs/running_the_federation.certificates.rst rename to docs/source/workflow/running_the_federation.certificates.rst index 4ea49d207b..8e7638feea 100644 --- a/docs/running_the_federation.certificates.rst +++ b/docs/source/workflow/running_the_federation.certificates.rst @@ -1,6 +1,8 @@ .. # Copyright (C) 2020 Intel Corporation .. # Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. +.. _instruction_manual_certs: + ************************** Configuring the Federation ************************** @@ -25,8 +27,6 @@ workflows: manual and semi-automatic (with step-ca). :align: center :type: mermaid -.. _install_certs: - .. kroki:: mermaid/pki_scheme.mmd :caption: Step-ca certificate generation and signing :align: center diff --git a/docs/running_the_federation.docker.rst b/docs/source/workflow/running_the_federation.docker.rst similarity index 100% rename from docs/running_the_federation.docker.rst rename to docs/source/workflow/running_the_federation.docker.rst diff --git a/docs/running_the_federation.notebook.rst b/docs/source/workflow/running_the_federation.notebook.rst similarity index 100% rename from docs/running_the_federation.notebook.rst rename to docs/source/workflow/running_the_federation.notebook.rst diff --git a/docs/running_the_federation.singularity.rst b/docs/source/workflow/running_the_federation.singularity.rst similarity index 100% rename from docs/running_the_federation.singularity.rst rename to docs/source/workflow/running_the_federation.singularity.rst diff --git a/docs/running_the_federation.start_nodes.rst b/docs/source/workflow/running_the_federation.start_nodes.rst similarity index 100% rename from docs/running_the_federation.start_nodes.rst rename to docs/source/workflow/running_the_federation.start_nodes.rst diff --git a/docs/structurizer_dsl/structurizr-1-Containers.svg b/docs/structurizer_dsl/structurizr-1-Containers.svg new file mode 100644 index 0000000000..8f816d4e88 --- /dev/null +++ b/docs/structurizer_dsl/structurizr-1-Containers.svg @@ -0,0 +1 @@ +Friday, 27 August 2021, 16:25 Moscow Standard TimeContainer diagram for OpenFLOpenFL[Software System]Central node-Collaborator node-Data scientist[Person]A person or group of peopleusing OpenFLEnvoy[Container]A long-living entity that can adapt alocal data set and spawncollaborators+Collaborator manager[Person]Data owner's representativecontrolling EnvoyDirector manager[Person]-Collaborator[Container]Actor executing tasks on local datainside one experiment+Python API component[Container]A set of tools to setup register FLExperiments+Director[Container]A long-living entity that can spawnaggregators-Aggregator[Container]Model server and collaboratororchestrator-Launches. Setsup globalFederationsettings--Remove link.Link options.Launches.Provides localdatasetShardDescriptors--Remove link.Link options.Sends locallytuned tensorsand trainingmetrics--Remove vertex.Remove link.Link options.Provides FL Plans,Tasks, Models,DataLoaders--Remove link.Link options.Sends tasks andinitial tensors--Remove vertex.Remove link.Link options.Approves, SendsFL experiments--Remove vertex.Remove link.Link options.Communicatesdataset info,Sends statusupdates--Remove vertex.Remove link.Link options.Creates aninstance tomaintain an FLexperiment--Remove link.Link options.Creates aninstance tomaintain an FLexperiment--Remove link.Link options.Sendsinformationabout theFederation.Returns trainingartifacts.--Remove vertex.Remove link.Link options.Registers FLexperiments--Remove vertex.Remove link.Link options. \ No newline at end of file diff --git a/docs/structurizer_dsl/workspace.dsl b/docs/structurizer_dsl/workspace.dsl new file mode 100755 index 0000000000..bfc5de60bb --- /dev/null +++ b/docs/structurizer_dsl/workspace.dsl @@ -0,0 +1,97 @@ + +workspace "OpenFL" "An open framework for Federated Learning." { + model { + group "Control" { + user = person "Data scientist" "A person or group of people using OpenFL" + shardOwner = person "Collaborator manager" "Data owner's representative controlling Envoy" + centralManager = person "Director manager" + governor = softwareSystem "Governor" "CCF-based system for corporate clients" + } + openfl = softwareSystem "OpenFL" "An open framework for Federated Learning" { + apiLayer = container "Python API component" "A set of tools to setup and register FL Experiments" { + federationInterface = component "Federaion Interface" + experimentInterface = component "Experiment Interface" + # TaskInterface = component "" + } + + group "Central node" { + director = container "Director" "A long-living entity that can spawn aggregators" + aggregator = container "Aggregator" "Model server and collaborator orchestrator"{ + assigner = component "Task Assigner" "Decides the policy for which collaborators should run FL tasks" + grpcServer = component "gRPC Server" + } + } + group "Collaborator node" { + envoy = container "Envoy" "A long-living entity that can adapt a local dataset and spawn collaborators" { + shardDescriptor = component "Shard Descriptor" "Data manager's interface aimed to unify data access" { + tags "Interface" + } + } + collaborator = container "Collaborator" "Actor executing tasks on local data inside one experiment" { + pluginManager = component "Plugin Manager" + taskRunner = component "Task Runner" + tensorDB = component "Tensor Data Base" + tensorCodec = component "TensorCodec" + grpcClient = component "gRPC Client" + frameworkAdapter = component "Framework Adapter" + } + } + } + config = element "Config file" + + # relationships between people and software systems + user -> openfl "Controls Fedarations. Provides FL plans, tasks, models, data" + governor -> openfl "Controls Fedarations" + + # relationships to/from containers + user -> apiLayer "Provides FL Plans, Tasks, Models, DataLoaders" + shardOwner -> envoy "Launches. Provides local dataset ShardDescriptors" + centralManager -> director "Launches. Sets up global Federation settings" + apiLayer -> director "Registers FL experiments" + director -> apiLayer "Sends information about the Federation. Returns training artifacts." + director -> aggregator "Creates an instance to maintain an FL experiment" + envoy -> collaborator "Creates an instance to maintain an FL experiment" + envoy -> director "Communicates dataset info, Sends status updates" + director -> envoy "Approves, Sends FL experiments" + aggregator -> collaborator "Sends tasks and initial tensors" + collaborator -> aggregator "Sends locally tuned tensors and training metrics" + + + # relationships to/from components + envoy -> taskRunner "Provides tasks' defenitions" + grpcClient -> taskRunner "Invokes some tasks for the round" + aggregator -> grpcClient "Communicates" + } + + views + theme default + + systemcontext openfl "SystemContext" { + include * + autoLayout + + } + + container openfl "Containers" { + include * + # include config + # autoLayout + } + + component collaborator "Collaborator" { + include * + autoLayout + } + + component apiLayer "API" { + include * + autoLayout + } + + component envoy "Envoy" { + include * + autoLayout + } + +} + diff --git a/docs/structurizer_dsl/workspace.json b/docs/structurizer_dsl/workspace.json new file mode 100644 index 0000000000..71ac98b00a --- /dev/null +++ b/docs/structurizer_dsl/workspace.json @@ -0,0 +1,509 @@ +{ + "id" : 1, + "name" : "OpenFL", + "description" : "An open framework for Federated Learning.", + "revision" : 0, + "lastModifiedDate" : "2021-08-27T13:28:56Z", + "lastModifiedAgent" : "structurizr-web/2475", + "properties" : { + "structurizr.dsl" : "CndvcmtzcGFjZSAiT3BlbkZMIiAiQW4gb3BlbiBmcmFtZXdvcmsgZm9yIEZlZGVyYXRlZCBMZWFybmluZy4iIHsKICAgIG1vZGVsIHsKICAgICAgICBncm91cCAiQ29udHJvbCIgewogICAgICAgICAgICB1c2VyID0gcGVyc29uICJEYXRhIHNjaWVudGlzdCIgIkEgcGVyc29uIG9yIGdyb3VwIG9mIHBlb3BsZSB1c2luZyBPcGVuRkwiCiAgICAgICAgICAgIHNoYXJkT3duZXIgPSBwZXJzb24gIkNvbGxhYm9yYXRvciBtYW5hZ2VyIiAiRGF0YSBvd25lcidzIHJlcHJlc2VudGF0aXZlIGNvbnRyb2xsaW5nIEVudm95IgogICAgICAgICAgICBjZW50cmFsTWFuYWdlciA9IHBlcnNvbiAiRGlyZWN0b3IgbWFuYWdlciIgCiAgICAgICAgICAgIGdvdmVybm9yID0gc29mdHdhcmVTeXN0ZW0gIkdvdmVybm9yIiAiQ0NGLWJhc2VkIHN5c3RlbSBmb3IgY29ycG9yYXRlIGNsaWVudHMiCiAgICAgICAgfQogICAgICAgIG9wZW5mbCA9IHNvZnR3YXJlU3lzdGVtICJPcGVuRkwiICJBbiBvcGVuIGZyYW1ld29yayBmb3IgRmVkZXJhdGVkIExlYXJuaW5nIiB7CiAgICAgICAgICAgIGFwaUxheWVyID0gY29udGFpbmVyICJQeXRob24gQVBJIGNvbXBvbmVudCIgIkEgc2V0IG9mIHRvb2xzIHRvIHNldHVwIHJlZ2lzdGVyIEZMIEV4cGVyaW1lbnRzIiB7CiAgICAgICAgICAgICAgICBmZWRlcmF0aW9uSW50ZXJmYWNlID0gY29tcG9uZW50ICJGZWRlcmFpb24gSW50ZXJmYWNlIgogICAgICAgICAgICAgICAgZXhwZXJpbWVudEludGVyZmFjZSA9IGNvbXBvbmVudCAiRXhwZXJpbWVudCBJbnRlcmZhY2UiCiAgICAgICAgICAgICAgICAjIFRhc2tJbnRlcmZhY2UgPSBjb21wb25lbnQgIiIKICAgICAgICAgICAgfQoKICAgICAgICAgICAgZ3JvdXAgIkNlbnRyYWwgbm9kZSIgewogICAgICAgICAgICAgICAgZGlyZWN0b3IgPSBjb250YWluZXIgIkRpcmVjdG9yIiAiQSBsb25nLWxpdmluZyBlbnRpdHkgdGhhdCBjYW4gc3Bhd24gYWdncmVnYXRvcnMiCiAgICAgICAgICAgICAgICBhZ2dyZWdhdG9yID0gY29udGFpbmVyICJBZ2dyZWdhdG9yIiAiTW9kZWwgc2VydmVyIGFuZCBjb2xsYWJvcmF0b3Igb3JjaGVzdHJhdG9yInsKICAgICAgICAgICAgICAgICAgICBhc3NpZ25lciA9IGNvbXBvbmVudCAiVGFzayBBc3NpZ25lciIgIkRlY2lkZXMgdGhlIHBvbGljeSBmb3Igd2hpY2ggY29sbGFib3JhdG9ycyBzaG91bGQgcnVuIEZMIHRhc2tzIgogICAgICAgICAgICAgICAgICAgIGdycGNTZXJ2ZXIgPSBjb21wb25lbnQgImdSUEMgU2VydmVyIgogICAgICAgICAgICAgICAgfQogICAgICAgICAgICB9CiAgICAgICAgICAgIGdyb3VwICJDb2xsYWJvcmF0b3Igbm9kZSIgewogICAgICAgICAgICAgICAgZW52b3kgPSBjb250YWluZXIgIkVudm95IiAiQSBsb25nLWxpdmluZyBlbnRpdHkgdGhhdCBjYW4gYWRhcHQgYSBsb2NhbCBkYXRhIHNldCBhbmQgc3Bhd24gY29sbGFib3JhdG9ycyIgewogICAgICAgICAgICAgICAgICAgIHNoYXJkRGVzY3JpcHRvciA9IGNvbXBvbmVudCAiU2hhcmQgRGVzY3JpcHRvciIgIkRhdGEgbWFuYWdlcidzIGludGVyZmFjZSBhaW1lZCB0byB1bmlmeSBkYXRhIGFjY2VzcyIgewogICAgICAgICAgICAgICAgICAgICAgICB0YWdzICJJbnRlcmZhY2UiCiAgICAgICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgfQogICAgICAgICAgICAgICAgY29sbGFib3JhdG9yID0gY29udGFpbmVyICJDb2xsYWJvcmF0b3IiICJBY3RvciBleGVjdXRpbmcgdGFza3Mgb24gbG9jYWwgZGF0YSBpbnNpZGUgb25lIGV4cGVyaW1lbnQiIHsKICAgICAgICAgICAgICAgICAgICBwbHVnaW5NYW5hZ2VyID0gY29tcG9uZW50ICJQbHVnaW4gTWFuYWdlciIKICAgICAgICAgICAgICAgICAgICB0YXNrUnVubmVyID0gY29tcG9uZW50ICJUYXNrIFJ1bm5lciIKICAgICAgICAgICAgICAgICAgICB0ZW5zb3JEQiA9IGNvbXBvbmVudCAiVGVuc29yIERhdGEgQmFzZSIKICAgICAgICAgICAgICAgICAgICB0ZW5zb3JDb2RlYyA9IGNvbXBvbmVudCAiVGVuc29yQ29kZWMiCiAgICAgICAgICAgICAgICAgICAgZ3JwY0NsaWVudCA9IGNvbXBvbmVudCAiZ1JQQyBDbGllbnQiCiAgICAgICAgICAgICAgICAgICAgZnJhbWV3b3JrQWRhcHRlciA9IGNvbXBvbmVudCAiRnJhbWV3b3JrIEFkYXB0ZXIiCiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9CiAgICAgICAgY29uZmlnID0gZWxlbWVudCAiQ29uZmlnIGZpbGUiCgogICAgICAgICMgcmVsYXRpb25zaGlwcyBiZXR3ZWVuIHBlb3BsZSBhbmQgc29mdHdhcmUgc3lzdGVtcwogICAgICAgIHVzZXIgLT4gb3BlbmZsICJDb250cm9scyBGZWRhcmF0aW9ucy4gUHJvdmlkZXMgRkwgcGxhbnMsIHRhc2tzLCBtb2RlbHMsIGRhdGEiCiAgICAgICAgZ292ZXJub3IgLT4gb3BlbmZsICJDb250cm9scyBGZWRhcmF0aW9ucyIKCiAgICAgICAgIyByZWxhdGlvbnNoaXBzIHRvL2Zyb20gY29udGFpbmVycwogICAgICAgIHVzZXIgLT4gYXBpTGF5ZXIgIlByb3ZpZGVzIEZMIFBsYW5zLCBUYXNrcywgTW9kZWxzLCBEYXRhTG9hZGVycyIKICAgICAgICBzaGFyZE93bmVyIC0+IGVudm95ICJMYXVuY2hlcy4gUHJvdmlkZXMgbG9jYWwgZGF0YXNldCBTaGFyZERlc2NyaXB0b3JzIgogICAgICAgIGNlbnRyYWxNYW5hZ2VyIC0+IGRpcmVjdG9yICJMYXVuY2hlcy4gU2V0cyB1cCBnbG9iYWwgRmVkZXJhdGlvbiBzZXR0aW5ncyIKICAgICAgICBhcGlMYXllciAtPiBkaXJlY3RvciAiUmVnaXN0ZXJzIEZMIGV4cGVyaW1lbnRzIgogICAgICAgIGRpcmVjdG9yIC0+IGFwaUxheWVyICJTZW5kcyBpbmZvcm1hdGlvbiBhYm91dCB0aGUgRmVkZXJhdGlvbi4gUmV0dXJucyB0cmFpbmluZyBhcnRpZmFjdHMuIgogICAgICAgIGRpcmVjdG9yIC0+IGFnZ3JlZ2F0b3IgIkNyZWF0ZXMgYW4gaW5zdGFuY2UgdG8gbWFpbnRhaW4gYW4gRkwgZXhwZXJpbWVudCIKICAgICAgICBlbnZveSAtPiBjb2xsYWJvcmF0b3IgIkNyZWF0ZXMgYW4gaW5zdGFuY2UgdG8gbWFpbnRhaW4gYW4gRkwgZXhwZXJpbWVudCIKICAgICAgICBlbnZveSAtPiBkaXJlY3RvciAiQ29tbXVuaWNhdGVzIGRhdGFzZXQgaW5mbywgU2VuZHMgc3RhdHVzIHVwZGF0ZXMiCiAgICAgICAgZGlyZWN0b3IgLT4gZW52b3kgIkFwcHJvdmVzLCBTZW5kcyBGTCBleHBlcmltZW50cyIKICAgICAgICBhZ2dyZWdhdG9yIC0+IGNvbGxhYm9yYXRvciAiU2VuZHMgdGFza3MgYW5kIGluaXRpYWwgdGVuc29ycyIKICAgICAgICBjb2xsYWJvcmF0b3IgLT4gYWdncmVnYXRvciAiU2VuZHMgbG9jYWxseSB0dW5lZCB0ZW5zb3JzIGFuZCB0cmFpbmluZyBtZXRyaWNzIgoKCiAgICAgICAgIyByZWxhdGlvbnNoaXBzIHRvL2Zyb20gY29tcG9uZW50cwogICAgICAgIGVudm95IC0+IHRhc2tSdW5uZXIgIlByb3ZpZGVzIHRhc2tzJyBkZWZlbml0aW9ucyIKICAgICAgICBncnBjQ2xpZW50IC0+IHRhc2tSdW5uZXIgIkludm9rZXMgc29tZSB0YXNrcyBmb3IgdGhlIHJvdW5kIgogICAgICAgIGFnZ3JlZ2F0b3IgLT4gZ3JwY0NsaWVudCAiQ29tbXVuaWNhdGVzIgogICAgfQoKICAgIHZpZXdzCiAgICAgICAgdGhlbWUgZGVmYXVsdAoKICAgICAgICBzeXN0ZW1jb250ZXh0IG9wZW5mbCAiU3lzdGVtQ29udGV4dCIgewogICAgICAgICAgICBpbmNsdWRlICoKICAgICAgICAgICAgYXV0b0xheW91dAogICAgICAgICAgICAKICAgICAgICB9CgogICAgICAgIGNvbnRhaW5lciBvcGVuZmwgIkNvbnRhaW5lcnMiIHsKICAgICAgICAgICAgaW5jbHVkZSAqCiAgICAgICAgICAgICMgaW5jbHVkZSBjb25maWcKICAgICAgICAgICAgIyBhdXRvTGF5b3V0CiAgICAgICAgfQoKICAgICAgICBjb21wb25lbnQgY29sbGFib3JhdG9yICJDb2xsYWJvcmF0b3IiIHsKICAgICAgICAgICAgaW5jbHVkZSAqCiAgICAgICAgICAgIGF1dG9MYXlvdXQKICAgICAgICB9CgogICAgICAgIGNvbXBvbmVudCBhcGlMYXllciAiQVBJIiB7CiAgICAgICAgICAgIGluY2x1ZGUgKgogICAgICAgICAgICBhdXRvTGF5b3V0CiAgICAgICAgfQoKICAgICAgICBjb21wb25lbnQgZW52b3kgIkVudm95IiB7CiAgICAgICAgICAgIGluY2x1ZGUgKgogICAgICAgICAgICBhdXRvTGF5b3V0CiAgICAgICAgfQoKfQoK" + }, + "configuration" : { }, + "model" : { + "people" : [ { + "id" : "2", + "tags" : "Element,Person", + "name" : "Collaborator manager", + "description" : "Data owner's representative controlling Envoy", + "relationships" : [ { + "id" : "26", + "tags" : "Relationship", + "sourceId" : "2", + "destinationId" : "13", + "description" : "Launches. Provides local dataset ShardDescriptors" + }, { + "id" : "27", + "tags" : "Relationship", + "sourceId" : "2", + "destinationId" : "5", + "description" : "Launches. Provides local dataset ShardDescriptors" + } ], + "group" : "Control", + "location" : "Unspecified" + }, { + "id" : "1", + "tags" : "Element,Person", + "name" : "Data scientist", + "description" : "A person or group of people using OpenFL", + "relationships" : [ { + "id" : "25", + "tags" : "Relationship", + "sourceId" : "1", + "destinationId" : "6", + "description" : "Provides FL Plans, Tasks, Models, DataLoaders" + }, { + "id" : "23", + "tags" : "Relationship", + "sourceId" : "1", + "destinationId" : "5", + "description" : "Controls Fedarations. Provides FL plans, tasks, models, data" + } ], + "group" : "Control", + "location" : "Unspecified" + }, { + "id" : "3", + "tags" : "Element,Person", + "name" : "Director manager", + "relationships" : [ { + "id" : "29", + "tags" : "Relationship", + "sourceId" : "3", + "destinationId" : "5", + "description" : "Launches. Sets up global Federation settings" + }, { + "id" : "28", + "tags" : "Relationship", + "sourceId" : "3", + "destinationId" : "9", + "description" : "Launches. Sets up global Federation settings" + } ], + "group" : "Control", + "location" : "Unspecified" + } ], + "softwareSystems" : [ { + "id" : "4", + "tags" : "Element,Software System", + "name" : "Governor", + "description" : "CCF-based system for corporate clients", + "relationships" : [ { + "id" : "24", + "tags" : "Relationship", + "sourceId" : "4", + "destinationId" : "5", + "description" : "Controls Fedarations" + } ], + "group" : "Control", + "location" : "Unspecified" + }, { + "id" : "5", + "tags" : "Element,Software System", + "name" : "OpenFL", + "description" : "An open framework for Federated Learning", + "location" : "Unspecified", + "containers" : [ { + "id" : "10", + "tags" : "Element,Container", + "name" : "Aggregator", + "description" : "Model server and collaborator orchestrator", + "relationships" : [ { + "id" : "40", + "tags" : "Relationship", + "sourceId" : "10", + "destinationId" : "20", + "description" : "Communicates" + }, { + "id" : "36", + "tags" : "Relationship", + "sourceId" : "10", + "destinationId" : "15", + "description" : "Sends tasks and initial tensors" + } ], + "group" : "Central node", + "components" : [ { + "id" : "11", + "tags" : "Element,Component", + "name" : "Task Assigner", + "description" : "Decides the policy for which collaborators should run FL tasks", + "size" : 0 + }, { + "id" : "12", + "tags" : "Element,Component", + "name" : "gRPC Server", + "size" : 0 + } ] + }, { + "id" : "15", + "tags" : "Element,Container", + "name" : "Collaborator", + "description" : "Actor executing tasks on local data inside one experiment", + "relationships" : [ { + "id" : "37", + "tags" : "Relationship", + "sourceId" : "15", + "destinationId" : "10", + "description" : "Sends locally tuned tensors and training metrics" + } ], + "group" : "Collaborator node", + "components" : [ { + "id" : "16", + "tags" : "Element,Component", + "name" : "Plugin Manager", + "size" : 0 + }, { + "id" : "21", + "tags" : "Element,Component", + "name" : "Framework Adapter", + "size" : 0 + }, { + "id" : "18", + "tags" : "Element,Component", + "name" : "Tensor Data Base", + "size" : 0 + }, { + "id" : "20", + "tags" : "Element,Component", + "name" : "gRPC Client", + "relationships" : [ { + "id" : "39", + "tags" : "Relationship", + "sourceId" : "20", + "destinationId" : "17", + "description" : "Invokes some tasks for the round" + } ], + "size" : 0 + }, { + "id" : "19", + "tags" : "Element,Component", + "name" : "TensorCodec", + "size" : 0 + }, { + "id" : "17", + "tags" : "Element,Component", + "name" : "Task Runner", + "size" : 0 + } ] + }, { + "id" : "6", + "tags" : "Element,Container", + "name" : "Python API component", + "description" : "A set of tools to setup register FL Experiments", + "relationships" : [ { + "id" : "30", + "tags" : "Relationship", + "sourceId" : "6", + "destinationId" : "9", + "description" : "Registers FL experiments" + } ], + "components" : [ { + "id" : "8", + "tags" : "Element,Component", + "name" : "Experiment Interface", + "size" : 0 + }, { + "id" : "7", + "tags" : "Element,Component", + "name" : "Federaion Interface", + "size" : 0 + } ] + }, { + "id" : "13", + "tags" : "Element,Container", + "name" : "Envoy", + "description" : "A long-living entity that can adapt a local data set and spawn collaborators", + "relationships" : [ { + "id" : "34", + "tags" : "Relationship", + "sourceId" : "13", + "destinationId" : "9", + "description" : "Communicates dataset info, Sends status updates" + }, { + "id" : "38", + "tags" : "Relationship", + "sourceId" : "13", + "destinationId" : "17", + "description" : "Provides tasks' defenitions" + }, { + "id" : "33", + "tags" : "Relationship", + "sourceId" : "13", + "destinationId" : "15", + "description" : "Creates an instance to maintain an FL experiment" + } ], + "group" : "Collaborator node", + "components" : [ { + "id" : "14", + "tags" : "Element,Component,Interface", + "name" : "Shard Descriptor", + "description" : "Data manager's interface aimed to unify data access", + "size" : 0 + } ] + }, { + "id" : "9", + "tags" : "Element,Container", + "name" : "Director", + "description" : "A long-living entity that can spawn aggregators", + "relationships" : [ { + "id" : "31", + "tags" : "Relationship", + "sourceId" : "9", + "destinationId" : "6", + "description" : "Sends information about the Federation. Returns training artifacts." + }, { + "id" : "35", + "tags" : "Relationship", + "sourceId" : "9", + "destinationId" : "13", + "description" : "Approves, Sends FL experiments" + }, { + "id" : "32", + "tags" : "Relationship", + "sourceId" : "9", + "destinationId" : "10", + "description" : "Creates an instance to maintain an FL experiment" + } ], + "group" : "Central node" + } ] + } ], + "customElements" : [ { + "id" : "22", + "tags" : "Element", + "name" : "Config file" + } ] + }, + "documentation" : { }, + "views" : { + "systemContextViews" : [ { + "softwareSystemId" : "5", + "key" : "SystemContext", + "paperSize" : "A4_Landscape", + "dimensions" : { + "width" : 3358, + "height" : 1454 + }, + "automaticLayout" : { + "implementation" : "Graphviz", + "rankDirection" : "TopBottom", + "rankSeparation" : 300, + "nodeSeparation" : 300, + "edgeSeparation" : 0, + "vertices" : false + }, + "enterpriseBoundaryVisible" : true, + "elements" : [ { + "id" : "1", + "x" : 2604, + "y" : 277 + }, { + "id" : "2", + "x" : 1854, + "y" : 277 + }, { + "id" : "3", + "x" : 1104, + "y" : 277 + }, { + "id" : "4", + "x" : 354, + "y" : 277 + }, { + "id" : "5", + "x" : 1479, + "y" : 877 + } ], + "relationships" : [ { + "id" : "29" + }, { + "id" : "27" + }, { + "id" : "24", + "vertices" : [ { + "x" : 954, + "y" : 681 + } ] + }, { + "id" : "23", + "vertices" : [ { + "x" : 2454, + "y" : 681 + } ] + } ] + } ], + "containerViews" : [ { + "softwareSystemId" : "5", + "key" : "Containers", + "dimensions" : { + "width" : 3104, + "height" : 2546 + }, + "externalSoftwareSystemBoundariesVisible" : true, + "elements" : [ { + "id" : "1", + "x" : 890, + "y" : 200 + }, { + "id" : "13", + "x" : 1740, + "y" : 1320 + }, { + "id" : "2", + "x" : 2470, + "y" : 1265 + }, { + "id" : "3", + "x" : 230, + "y" : 1270 + }, { + "id" : "15", + "x" : 1740, + "y" : 1855 + }, { + "id" : "6", + "x" : 880, + "y" : 760 + }, { + "id" : "9", + "x" : 880, + "y" : 1320 + }, { + "id" : "10", + "x" : 880, + "y" : 1855 + } ], + "relationships" : [ { + "id" : "28" + }, { + "id" : "26" + }, { + "id" : "37", + "vertices" : [ { + "x" : 1535, + "y" : 1940 + } ] + }, { + "id" : "25" + }, { + "id" : "36", + "vertices" : [ { + "x" : 1565, + "y" : 2090 + } ] + }, { + "id" : "35", + "vertices" : [ { + "x" : 1550, + "y" : 1530 + } ] + }, { + "id" : "34", + "vertices" : [ { + "x" : 1530, + "y" : 1360 + } ] + }, { + "id" : "33" + }, { + "id" : "32" + }, { + "id" : "31", + "vertices" : [ { + "x" : 1215, + "y" : 1185 + } ] + }, { + "id" : "30", + "vertices" : [ { + "x" : 995, + "y" : 1175 + } ] + } ] + } ], + "componentViews" : [ { + "key" : "Collaborator", + "automaticLayout" : { + "implementation" : "Graphviz", + "rankDirection" : "TopBottom", + "rankSeparation" : 300, + "nodeSeparation" : 300, + "edgeSeparation" : 0, + "vertices" : false + }, + "containerId" : "15", + "externalContainerBoundariesVisible" : true, + "elements" : [ { + "id" : "13", + "x" : 0, + "y" : 0 + }, { + "id" : "16", + "x" : 0, + "y" : 0 + }, { + "id" : "17", + "x" : 0, + "y" : 0 + }, { + "id" : "18", + "x" : 0, + "y" : 0 + }, { + "id" : "19", + "x" : 0, + "y" : 0 + }, { + "id" : "20", + "x" : 0, + "y" : 0 + }, { + "id" : "21", + "x" : 0, + "y" : 0 + }, { + "id" : "10", + "x" : 0, + "y" : 0 + } ], + "relationships" : [ { + "id" : "40" + }, { + "id" : "38" + }, { + "id" : "39" + } ] + }, { + "key" : "API", + "automaticLayout" : { + "implementation" : "Graphviz", + "rankDirection" : "TopBottom", + "rankSeparation" : 300, + "nodeSeparation" : 300, + "edgeSeparation" : 0, + "vertices" : false + }, + "containerId" : "6", + "externalContainerBoundariesVisible" : true, + "elements" : [ { + "id" : "7", + "x" : 0, + "y" : 0 + }, { + "id" : "8", + "x" : 0, + "y" : 0 + } ] + }, { + "key" : "Envoy", + "automaticLayout" : { + "implementation" : "Graphviz", + "rankDirection" : "TopBottom", + "rankSeparation" : 300, + "nodeSeparation" : 300, + "edgeSeparation" : 0, + "vertices" : false + }, + "containerId" : "13", + "externalContainerBoundariesVisible" : true, + "elements" : [ { + "id" : "14", + "x" : 0, + "y" : 0 + } ] + } ], + "configuration" : { + "branding" : { }, + "styles" : { }, + "themes" : [ "https://static.structurizr.com/themes/default/theme.json" ], + "terminology" : { }, + "lastSavedView" : "Containers" + } + } +} \ No newline at end of file diff --git a/openfl/plugins/frameworks_adapters/framework_adapter_interface.py b/openfl/plugins/frameworks_adapters/framework_adapter_interface.py index d9a118ba3b..6107727176 100644 --- a/openfl/plugins/frameworks_adapters/framework_adapter_interface.py +++ b/openfl/plugins/frameworks_adapters/framework_adapter_interface.py @@ -16,7 +16,7 @@ def serialization_setup(): pass @staticmethod - def get_tensor_dict(model, optimizer=None): + def get_tensor_dict(model, optimizer=None) -> dict: """ Extract tensor dict from a model and an optimizer.