securefederatedai · alexey-gruzdev · Dec 9, 2021 · Dec 2, 2021 · Dec 3, 2021 · Dec 7, 2021
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/README.md b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/README.md
@@ -0,0 +1,48 @@
+# Anomaly Detection with PatchSVDD for MVTec Dataset
+
+![MVTec AD objects](https://www.mvtec.com/fileadmin/Redaktion/mvtec.com/company/research/datasets/dataset_overview_large.png "MVTec AD objects")
+
+
+### 1. About dataset
+MVTec AD is a dataset for benchmarking anomaly detection methods with a focus on industrial inspection. It contains over 5000 high-resolution images divided into fifteen different object and texture categories. Each class contains
+60 to 390 normal train images (defect free) and 40 to 167 test images (with various kinds of defects as well as images without defects). More info at [MVTec dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad).
+For each object, the data is divided into 3 folders - 'train' (containing defect free training images), 'test'(containing test images, both good and bad), 'ground_truth' (containing the masks of defected images).
+
+### 2. About model
+Two neural networks are used: an encoder and a classifier. The encoder is composed of convolutional layers only. The classifier is a two layered MLP model having 128 hidden units per layer, and the input to the classifier is a subtraction of the features of the two patches. The activation function for both networks is a LeakyReLU with a α = 0.1.
+The encoder has a hierarchical structure. The receptive field of the encoder is K = 64, and that of the embedded smaller encoder is K = 32. Patch SVDD divides the images into patches with a size K and a stride S. The values for the strides are S = 16 and S = 4 for the encoders with K = 64 and K = 32, respectively.
+
+### 3. Links
+* [Original paper](https://arxiv.org/abs/2006.16067)
+* [Original Github code](https://github.com/nuclearboy95/Anomaly-Detection-PatchSVDD-PyTorch/tree/934d6238e5e0ad511e2a0e7fc4f4899010e7d892)
+* [MVTec ad dataset download link](https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz)
+
+
+### 4. How to run this tutorial (without TLS and locally as a simulation):
+
+Go to example folder:
+cd <openfl_folder>/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD
+
+1. Run director:
+```sh
+cd director
+bash start_director.sh
+```
+
+2. Run envoy:
+```sh
+cd envoy
+bash start_envoy.sh env_one envoy_config.yaml
+```
+
+Optional: start second envoy:
+ - Copy `envoy` to another place and run from there:
+```sh
+bash start_envoy.sh env_two envoy_config_two.yaml
+```
+
+3. Run `PatchSVDD_with_Director.ipynb` jupyter notebook:
+```sh
+cd workspace
+jupyter notebook PatchSVDD_with_Director.ipynb
+```
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/director/director_config.yaml b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/director/director_config.yaml
@@ -0,0 +1,5 @@
+settings:
+  listen_host: localhost
+  listen_port: 50050
+  sample_shape: ['256', '256', '3']
+  target_shape: ['256', '256']
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director.sh b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+
+fx director start --disable-tls -c director_config.yaml
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director_with_tls.sh b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/director/start_director_with_tls.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+FQDN=$1
+fx director start -c director_config.yaml -rc cert/root_ca.crt -pk cert/"${FQDN}".key -oc cert/"${FQDN}".crt
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/envoy_config.yaml b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/envoy_config.yaml
@@ -0,0 +1,11 @@
+params:
+  cuda_devices: [0,2]
+
+optional_plugin_components: {}
+
+shard_descriptor:
+  template: mvtec_shard_descriptor.MVTecShardDescriptor
+  params:
+    data_folder: MVTec_data
+    rank_worldsize: 1,1
+    obj: bottle
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/mvtec_shard_descriptor.py b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/mvtec_shard_descriptor.py
@@ -0,0 +1,159 @@
+# Copyright (C) 2020-2021 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""MVTec shard descriptor."""
+
+import os
+from glob import glob
+from pathlib import Path
+
+import numpy as np
+from imageio import imread
+from PIL import Image
+
+from openfl.interface.interactive_api.shard_descriptor import ShardDataset
+from openfl.interface.interactive_api.shard_descriptor import ShardDescriptor
+
+
+class MVTecShardDataset(ShardDataset):
+    """MVTec Shard dataset class."""
+
+    def __init__(self, images_path,
+                 mask_path, labels,
+                 rank=1,
+                 worldsize=1):
+        """Initialize MVTecShardDataset."""
+        self.rank = rank
+        self.worldsize = worldsize
+        self.images_path = images_path[self.rank - 1::self.worldsize]
+        self.mask_path = mask_path[self.rank - 1::self.worldsize]
+        self.labels = labels[self.rank - 1::self.worldsize]
+
+    def __getitem__(self, index):
+        """Return a item by the index."""
+        img = np.asarray(imread(self.images_path[index]))
+        if img.shape[-1] != 3:
+            img = self.gray2rgb(img)
+
+        img = self.resize(img)
+        img = np.asarray(img)
+        label = self.labels[index]
+        if self.mask_path[index]:
+            mask = np.asarray(imread(self.mask_path[index]))
+            mask = self.resize(mask)
+            mask = np.asarray(mask)
+        else:
+            mask = np.zeros(img.shape)[:, :, 0]
+        return img, mask, label
+
+    def __len__(self):
+        """Return the len of the dataset."""
+        return len(self.images_path)
+
+    def resize(self, image, shape=(256, 256)):
+        """Resize image."""
+        return np.array(Image.fromarray(image).resize(shape))
+
+    def gray2rgb(self, images):
+        """Change image from gray to rgb."""
+        tile_shape = tuple(np.ones(len(images.shape), dtype=int))
+        tile_shape += (3,)
+
+        images = np.tile(np.expand_dims(images, axis=-1), tile_shape)
+        return images
+
+
+class MVTecShardDescriptor(ShardDescriptor):
+    """MVTec Shard descriptor class."""
+
+    def __init__(self, data_folder: str = 'MVTec_data',
+                 rank_worldsize: str = '1,1',
+                 obj: str = 'bottle'):
+        """Initialize MVTecShardDescriptor."""
+        super().__init__()
+
+        self.dataset_path = Path.cwd() / data_folder
+        self.download_data()
+        self.rank, self.worldsize = tuple(int(num) for num in rank_worldsize.split(','))
+        self.obj = obj
+
+        # Calculating data and target shapes
+        ds = self.get_dataset()
+        sample, masks, target = ds[0]
+        self._sample_shape = [str(dim) for dim in sample.shape]
+        self._target_shape = [str(dim) for dim in target.shape]
+
+    def download_data(self):
+        """Download data."""
+        zip_file_path = self.dataset_path / 'mvtec_anomaly_detection.tar.xz'
+        if not Path(zip_file_path).exists():
+            os.makedirs(self.dataset_path, exist_ok=True)
+            print('Downloading MVTec Dataset...this might take a while')
+            os.system('wget -nc'
+                      " 'https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094/mvtec_anomaly_detection.tar.xz'"  # noqa
+                      f' -O {zip_file_path.relative_to(Path.cwd())}')
+            print('Downloaded MVTec dataset, untar-ring now')
+            os.system(f'tar -xvf {zip_file_path.relative_to(Path.cwd())}'
+                      f' -C {self.dataset_path.relative_to(Path.cwd())}')
+            # change to write permissions
+            self.change_permissions(self.dataset_path, 0o764)
+
+    def change_permissions(self, folder, code):
+        """Change permissions after data is downloaded."""
+        for root, dirs, files in os.walk(folder):
+            for d in dirs:
+                os.chmod(os.path.join(root, d), code)
+            for f in files:
+                os.chmod(os.path.join(root, f), code)
+
+    def get_dataset(self, dataset_type='train'):
+        """Return a shard dataset by type."""
+        # Train dataset
+        if dataset_type == 'train':
+            fpattern = os.path.join(self.dataset_path, f'{self.obj}/train/*/*.png')
+            fpaths = sorted(glob(fpattern))
+            self.images_path = list(fpaths)
+            self.labels = np.zeros(len(fpaths), dtype=np.int32)
+            # Masks
+            self.mask_path = np.full(self.labels.shape, None)
+        # Test dataset
+        elif dataset_type == 'test':
+            fpattern = os.path.join(self.dataset_path, f'{self.obj}/test/*/*.png')
+            fpaths = sorted(glob(fpattern))
+            fpaths_anom = list(
+                filter(lambda fpath: os.path.basename(os.path.dirname(fpath)) != 'good', fpaths))
+            fpaths_good = list(
+                filter(lambda fpath: os.path.basename(os.path.dirname(fpath)) == 'good', fpaths))
+            fpaths = fpaths_anom + fpaths_good
+            self.images_path = fpaths
+            self.labels = np.zeros(len(fpaths_anom) + len(fpaths_good), dtype=np.int32)
+            self.labels[:len(fpaths_anom)] = 1   # anomalies
+            # Masks
+            fpattern_mask = os.path.join(self.dataset_path, f'{self.obj}/ground_truth/*/*.png')
+            self.mask_path = sorted(glob(fpattern_mask)) + [None] * len(fpaths_good)
+        else:
+            raise Exception(f'Wrong dataset type: {dataset_type}.'
+                            f'Choose from the list: [train, test]')
+
+        return MVTecShardDataset(
+            images_path=self.images_path,
+            mask_path=self.mask_path,
+            labels=self.labels,
+            rank=self.rank,
+            worldsize=self.worldsize,
+        )
+
+    @property
+    def sample_shape(self):
+        """Return the sample shape info."""
+        return ['256', '256', '3']
+
+    @property
+    def target_shape(self):
+        """Return the target shape info."""
+        return ['256', '256']
+
+    @property
+    def dataset_description(self) -> str:
+        """Return the shard dataset description."""
+        return (f'MVTec dataset, shard number {self.rank}'
+                f' out of {self.worldsize}')
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/sd_requirements.txt b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/sd_requirements.txt
@@ -0,0 +1,3 @@
+numpy
+pillow
+imageio
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy.sh b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+set -e
+
+fx envoy start -n env_one --disable-tls --envoy-config-path envoy_config.yaml -dh localhost -dp 50050
diff --git a/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy_with_tls.sh b/openfl-tutorials/interactive_api/PyTorch_MVTec_PatchSVDD/envoy/start_envoy_with_tls.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -e
+ENVOY_NAME=$1
+DIRECTOR_FQDN=$2
+
+fx envoy start -n "$ENVOY_NAME" --envoy-config-path envoy_config.yaml -dh"$DIRECTOR_FQDN" -dp 50050 -rc cert/root_ca.crt -pk cert/"$ENVOY_NAME".key -oc cert/"$ENVOY_NAME".crt