securefederatedai · alexey-gruzdev · Jul 30, 2021 · Apr 23, 2021 · May 20, 2021 · May 21, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,8 +1,13 @@
 *.egg-info
+*.pkl
 __pycache__
 /build
 /dist
 .vscode
 .ipynb_checkpoints
 venv/*
 .idea
+
+*.jpg
+*.crt
+*.key
diff --git a/docs/advanced_topics.rst b/docs/advanced_topics.rst
@@ -14,5 +14,6 @@ Advanced Topics
    compression_settings
    overriding_agg_fn
    bash_autocomplete_activation
+   log_metric_callback
 
 
diff --git a/docs/log_metric_callback.rst b/docs/log_metric_callback.rst
@@ -0,0 +1,69 @@
+.. # Copyright (C) 2020-2021 Intel Corporation
+.. # SPDX-License-Identifier: Apache-2.0
+
+.. _log_metric_callback:
+===============================
+Metric logging callback
+===============================
+
+-------------------------------
+Usage
+-------------------------------
+|productName| allows developers to use custom metric logging functions. This function will call on aggregator node.
+In order to define such function, you should:
+
+Python API
+==========
+Define function with follow signature:
+
+.. code-block:: python
+
+    def callback_name(node_name, task_name, metric_name, metric, round_number):
+        """
+        Write metric callback 
+
+        Args:
+            node_name (str): Name of node, which generate metric 
+            task_name (str): Name of task
+            metric_name (str): Name of metric 
+            metric (np.ndarray): Metric value
+            round_number (int): Round number
+        """
+        your code 
+CLI
+====
+
+Define callback function similar way like in python api in ``src`` folder of your workspace. And provide a way to your function in ``aggregator`` part of ``plan/plan.yaml`` file in your workspace, use ``log_metric_callback`` key: 
+
+.. code-block:: yaml
+
+  aggregator :
+    defaults : plan/defaults/aggregator.yaml
+    template : openfl.component.Aggregator
+    settings :
+      init_state_path     : save/torch_cnn_mnist_init.pbuf
+      best_state_path     : save/torch_cnn_mnist_best.pbuf
+      last_state_path     : save/torch_cnn_mnist_last.pbuf
+      rounds_to_train     : 10
+      log_metric_callback :
+        template : src.mnist_utils.callback_name
+
+
+
+Example
+=======================
+
+Below is an example of a log metric callback, which writes metric values to tensorboard
+
+.. code-block:: python
+
+    from torch.utils.tensorboard import SummaryWriter
+
+    writer = SummaryWriter('./logs/cnn_mnist', flush_secs=5)
+
+
+    def write_metric(node_name, task_name, metric_name, metric, round_number):
+        writer.add_scalar("{}/{}/{}".format(node_name, task_name, metric_name),
+                        metric, round_number) 
+
+Full implementation can be found in ``openfl-tutorials/Federated_Pytorch_MNIST_Tutorial.ipynb`` and in ``torch_cnn_mnist`` workspace
diff --git a/docs/mermaid/pki_scheme.mmd b/docs/mermaid/pki_scheme.mmd
@@ -0,0 +1,20 @@
+sequenceDiagram
+Title: Collaborator Certificate Signing Flow
+  participant A as Aggregator
+  participant CA as CA
+  participant C as Collaborator
+  CA->>CA: 1. Create CA:<br>`step ca init --password-file pass_file`
+  CA->>CA: 2. Up HTTPS CA server:<br>`step_ca ca_config.json`
+  CA->>CA: 3. Generate JWK pair:<br>`step crypto jwk create pub.json priv.json --password-file pass_file`
+  CA->>CA: 4. Get JWT for aggregator:<br>`step ca token localhost --key priv.json --password-file pass_file --ca-url ca_url`
+  CA->>A: 5. Copy JWT to aggregator. 
+  A->>CA: 6. Certify node:<br>`step ca certificate localhost agg.crt agg.key --token AbC1d2E..`
+  Note over A,CA: Get agg.crt
+  CA->>CA: 7. Get JWT for collaborator:<br>`step ca token col_name --key priv.json --password-file pass_file --ca-url ca_url`
+  CA->>C: 8. Copy JWT to collaborator. 
+  C->>CA: 9. Certify node:<br>`step ca certificate col_name col_name.crt col_name.key --token AbC1d2E..`
+  Note over C,CA: Get col_name.crt
+  CA->>A: 10. Copy root_ca.crt to aggregator
+  Note over A,CA: This could be done at step 5 with token
+  CA->>C: 11. Copy root_ca.crt to collaborator
+  Note over C,CA: This could be done at step 8 with token
diff --git a/docs/running_the_federation.certificates.rst b/docs/running_the_federation.certificates.rst
@@ -11,7 +11,8 @@ Therefore, security keys and certificates will need to be created for the
 aggregator and collaborators
 to negotiate the connection securely. For the :ref:`Hello Federation <running_the_federation>` demo
 we will run the aggregator and collaborators on the same localhost server
-so these configuration steps just need to be done once on that machine.
+so these configuration steps just need to be done once on that machine. We have two pki
+workflows: manual and semi-automatic (with step-ca).
 
     .. note::
 
@@ -20,10 +21,16 @@ so these configuration steps just need to be done once on that machine.
 .. _install_certs:
 
 .. kroki:: mermaid/CSR_signing.mmd
-    :caption: Certificate generation and signing
+    :caption: Manual certificate generation and signing
     :align: center
     :type: mermaid
 
+.. _install_certs:
+
+.. kroki:: mermaid/pki_scheme.mmd
+    :caption: Step-ca certificate generation and signing
+    :align: center
+    :type: mermaid
 
 
 .. _install_certs_agg:

diff --git a/docs/running_the_federation.interactive_api.rst b/docs/running_the_federation.interactive_api.rst
@@ -23,6 +23,58 @@ Python Environment
 ===================
 Create a virtual Python environment. Please, install only packages that are required for conducting the experiment, since Python environment will be replicated on collaborator nodes.
 
+******************************************
+Certification
+******************************************
+If you have trusted workspace and connection should not be encrypted you can use :code:`disable_tls` option while starting expirement.
+Otherwise it is necessary to certify each node participating in the federation. Certificates allow to use mutual tls connection between nodes.
+You can certify nodes by your own pki system or use pki provided by OpenFL. It is based on `step-ca <https://github.com/smallstep/certificates>`_
+as a server and `step <https://github.com/smallstep/cli>`_ as a client utilities. They are downloaded from github during workspace setup. Regardless of the certification method,
+paths to certificates on each node are provided at start of experiment. Pki workflow from OpenFL will be discussed below.
+
+OpenFL PKI workflow
+===================
+Openfl PKI pipeline asumes creating local CA with https server which listen signing requests.
+Certificates from each node can be signed by requesting to CA server with special token.
+Token must be copied to each node by some secure way. Each step is considered in detail below.
+
+1. Create CA, i.e create root key pair, CA server config and other.
+    .. code-block:: console
+
+       $ fx pki install -p </path/to/ca/dir> --password <123> --ca-url <host:port>
+    | :code:`-p` - path to folder, which will contain ca files.
+    | :code:`--password` - password that will encrypts some ca files.
+    | :code:`--ca-url` - host and port which ca server will listen
+    This command will also download `step-ca <https://github.com/smallstep/certificates>`_ and `step <https://github.com/smallstep/cli>`_ binaries from github.
+
+2. Run CA https server.
+    .. code-block:: console
+
+       $ fx pki run -p </path/to/ca/dir>
+    | :code:`-p` - path to folder, which will contain ca files.
+
+3. Get token for some node.
+
+    .. code-block:: console
+
+       $ fx pki get-token -n <subject>
+    | :code:`-n` - subject name, fqdn for director, collaborator name for envoy or api name for api-layer node
+
+    Run this command on ca side, from ca folder. Output is a token which contains JWT (json web token) from CA server and CA
+    root certificate concatenated together. This JWT have twenty-four hours time-to-live.
+
+4. Copy token to node side (director or envoy) by some secure channel and run certify command.
+    .. code-block:: console
+
+       $ fx pki certify -n <subject> -t <token>
+    | :code:`-n` - subject name, fqdn for director, collaborator name for envoy or api name for api-layer node
+    | :code:`-t` - output token from previous command
+    This command call step client, to connect to CA server over https.
+    Https is provided by root certificate which was copy with JWT.
+    Server authenticates client by JWT and client authenticates server by root certificate.
+
+Now signed certificate and private key are stored on current node. Signed certificate has one year time-to-live. You should certify all node that will participate in federation: director, all envoys and api-layer node.
+
 ******************************************
 Defining a Federated Learning Experiment
 ******************************************

diff --git a/openfl-tutorials/Federated_FedProx_PyTorch_MNIST_Tutorial.ipynb b/openfl-tutorials/Federated_FedProx_PyTorch_MNIST_Tutorial.ipynb
@@ -40,7 +40,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "After importing the required packages, the next step is setting up our openfl workspace. To do this, simply run the `fx.init()` command as follows:"
+    "After importing the required packages, the next step is setting Up our openfl workspace. To do this, simply run the `fx.init()` command as follows:"
    ]
   },
   {

diff --git a/openfl-tutorials/Federated_PyTorch_UNET_Tutorial.ipynb b/openfl-tutorials/Federated_PyTorch_UNET_Tutorial.ipynb
@@ -135,8 +135,11 @@
     "    def __init__(self, data_path, collaborator_count, collaborator_num, is_validation):\n",
     "        self.images_path = './data/segmented-images/images/'\n",
     "        self.masks_path = './data/segmented-images/masks/'\n",
-    "        self.images_names = [img_name for img_name in sorted(listdir(\n",
-    "            self.images_path)) if len(img_name) > 3 and img_name[-3:] == 'jpg']\n",
+    "        self.images_names = [\n",
+    "            img_name\n",
+    "            for img_name in sorted(listdir(self.images_path))\n",
+    "            if len(img_name) > 3 and img_name[-3:] == 'jpg'\n",
+    "        ]\n",
     "\n",
     "        self.images_names = self.images_names[collaborator_num:: collaborator_count]\n",
     "        self.is_validation = is_validation\n",
@@ -267,9 +270,9 @@
     "    return score.sum()\n",
     "\n",
     "\n",
-    "class double_conv(nn.Module):\n",
+    "class DoubleConv(nn.Module):\n",
     "    def __init__(self, in_ch, out_ch):\n",
-    "        super(double_conv, self).__init__()\n",
+    "        super(DoubleConv, self).__init__()\n",
     "        self.in_ch = in_ch\n",
     "        self.out_ch = out_ch\n",
     "        self.conv = nn.Sequential(\n",
@@ -286,33 +289,36 @@
     "        return x\n",
     "\n",
     "\n",
-    "class down(nn.Module):\n",
+    "class Down(nn.Module):\n",
     "    def __init__(self, in_ch, out_ch):\n",
-    "        super(down, self).__init__()\n",
+    "        super(Down, self).__init__()\n",
     "        self.mpconv = nn.Sequential(\n",
-    "            nn.MaxPool2d(2), double_conv(in_ch, out_ch))\n",
+    "            nn.MaxPool2d(2),\n",
+    "            DoubleConv(in_ch, out_ch)\n",
+    "        )\n",
     "\n",
     "    def forward(self, x):\n",
     "        x = self.mpconv(x)\n",
     "        return x\n",
     "\n",
     "\n",
-    "class up(nn.Module):\n",
+    "class Up(nn.Module):\n",
     "    def __init__(self, in_ch, out_ch, bilinear=False):\n",
-    "        super(up, self).__init__()\n",
+    "        super(Up, self).__init__()\n",
     "        self.in_ch = in_ch\n",
     "        self.out_ch = out_ch\n",
     "        if bilinear:\n",
-    "            self.up = nn.Upsample(\n",
-    "                scale_factor=2, mode=\"bilinear\", align_corners=True)\n",
-    "        else:\n",
-    "            self.up = nn.ConvTranspose2d(\n",
-    "                in_ch, in_ch // 2, 2, stride=2\n",
+    "            self.Up = nn.Upsample(\n",
+    "                scale_factor=2,\n",
+    "                mode=\"bilinear\",\n",
+    "                align_corners=True\n",
     "            )\n",
-    "        self.conv = double_conv(in_ch, out_ch)\n",
+    "        else:\n",
+    "            self.Up = nn.ConvTranspose2d(in_ch, in_ch // 2, 2, stride=2)\n",
+    "        self.conv = DoubleConv(in_ch, out_ch)\n",
     "\n",
     "    def forward(self, x1, x2):\n",
-    "        x1 = self.up(x1)\n",
+    "        x1 = self.Up(x1)\n",
     "        diffY = x2.size()[2] - x1.size()[2]\n",
     "        diffX = x2.size()[3] - x1.size()[3]\n",
     "\n",
@@ -327,15 +333,15 @@
     "class UNet(nn.Module):\n",
     "    def __init__(self, n_channels=3, n_classes=1):\n",
     "        super().__init__()\n",
-    "        self.inc = double_conv(n_channels, 64)\n",
-    "        self.down1 = down(64, 128)\n",
-    "        self.down2 = down(128, 256)\n",
-    "        self.down3 = down(256, 512)\n",
-    "        self.down4 = down(512, 1024)\n",
-    "        self.up1 = up(1024, 512)\n",
-    "        self.up2 = up(512, 256)\n",
-    "        self.up3 = up(256, 128)\n",
-    "        self.up4 = up(128, 64)\n",
+    "        self.inc = DoubleConv(n_channels, 64)\n",
+    "        self.down1 = Down(64, 128)\n",
+    "        self.down2 = Down(128, 256)\n",
+    "        self.down3 = Down(256, 512)\n",
+    "        self.down4 = Down(512, 1024)\n",
+    "        self.up1 = Up(1024, 512)\n",
+    "        self.up2 = Up(512, 256)\n",
+    "        self.up3 = Up(256, 128)\n",
+    "        self.up4 = Up(128, 64)\n",
     "        self.outc = nn.Conv2d(64, n_classes, 1)\n",
     "\n",
     "    def forward(self, x):\n",
@@ -558,4 +564,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
diff --git a/openfl-tutorials/Federated_Pytorch_MNIST_Tutorial.ipynb b/openfl-tutorials/Federated_Pytorch_MNIST_Tutorial.ipynb
@@ -49,7 +49,7 @@
    "outputs": [],
    "source": [
     "#Setup default workspace, logging, etc.\n",
-    "fx.init('torch_cnn_mnist')"
+    "fx.init('torch_cnn_mnist', log_level='METRIC', log_file='./spam_metric.log')"
    ]
   },
   {
@@ -124,6 +124,29 @@
     "    return F.cross_entropy(input=output,target=target)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we can define metric logging function. It should has the following signature described below. You can use it to write metrics to tensorboard or some another specific logging."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.utils.tensorboard import SummaryWriter\n",
+    "\n",
+    "writer = SummaryWriter('./logs/cnn_mnist', flush_secs=5)\n",
+    "\n",
+    "\n",
+    "def write_metric(node_name, task_name, metric_name, metric, round_number):\n",
+    "    writer.add_scalar(\"{}/{}/{}\".format(node_name, task_name, metric_name),\n",
+    "                      metric, round_number)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -205,8 +228,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Run experiment, return trained FederatedModel\n",
-    "final_fl_model = fx.run_experiment(collaborators,{'aggregator.settings.rounds_to_train':5})"
+    "# Run experiment, return trained FederatedModel\n",
+    "\n",
+    "final_fl_model = fx.run_experiment(collaborators, override_config={\n",
+    "    'aggregator.settings.rounds_to_train': 5,\n",
+    "    'aggregator.settings.log_metric_callback': write_metric,\n",
+    "})"
    ]
   },
   {

diff --git a/...torials/interactive_api/Director_Pytorch_Kvasir_UNET/director_folder/director_config.yaml b/...torials/interactive_api/Director_Pytorch_Kvasir_UNET/director_folder/director_config.yaml
@@ -0,0 +1,4 @@
+settings:
+  listen_ip: localhost
+  sample_shape: ['300', '400', '3']
+  target_shape: ['300', '400']
diff --git a/...-tutorials/interactive_api/Director_Pytorch_Kvasir_UNET/director_folder/start_director.sh b/...-tutorials/interactive_api/Director_Pytorch_Kvasir_UNET/director_folder/start_director.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+
+fx director start --disable-tls -c director_config.yaml
diff --git a/...s/interactive_api/Director_Pytorch_Kvasir_UNET/director_folder/start_director_with_tls.sh b/...s/interactive_api/Director_Pytorch_Kvasir_UNET/director_folder/start_director_with_tls.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+FQDN=$1
+fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt