# Model Preparation
A notebook that aids the preparation of a CNN.

In [1]:
# Imports
import os, sys
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import confusion_matrix, average_precision_score, accuracy_score, precision_score, recall_score
models = tf.keras.models  # like 'from tensorflow.keras import models' (PyCharm import issue workaround)
layers = tf.keras.layers  # like 'from tensorflow.keras import layers' (PyCharm import issue workaround)
optimizers = tf.keras.optimizers  # like 'from tensorflow.keras import optimizers' (PyCharm import issue workaround)

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from Scripts import Data_Loader_Functions as Data_Loader
from Scripts import Centralized_Pain_CNN as painCNN
from Scripts.Experiments import experiment_federated_pain

In [2]:
# Load data
# train_path = os.path.join(cNN.ROOT, "Data", "Augmented Data", "Pain Two-Step Augmentation", "group_1")
test_path = os.path.join(module_path, "Data", "Augmented Data", "Pain Two-Step Augmentation", "group_2_test")
train_path_add_data = os.path.join(module_path, "Data", "Augmented Data", "Pain Two-Step Augmentation",
                                   "group_2_train")
test_data, test_labels = Data_Loader.load_pain_data(test_path)

# Define labels for training
person = 0
label = 4  # Labels: [person, session, culture, frame, pain, Trans_1, Trans_2]

# Prepare labels for training and evaluation
# train_labels_ord = train_labels[:, label].astype(np.int)
# train_labels_bin = Data_Loader.reduce_pain_label_categories(train_labels_ord, max_pain=1)
test_labels_ord = test_labels[:, label].astype(np.int)
test_labels_bin = Data_Loader.reduce_pain_label_categories(test_labels_ord, max_pain=1)
# test_labels_people = test_labels[:, person].astype(np.int)

# Load Pretrained model
model = tf.keras.models.load_model('/Users/nico/PycharmProjects/FederatedLearning/Models/Pain/Centralized/'
                                   '2019-07-23-051453_Centralized_PAIN_Centralized-Training.h5')

# Load additional data
add_train_data, add_train_labels = Data_Loader.load_pain_data(train_path_add_data)
add_train_labels_ord = add_train_labels[:, label].astype(np.int)
train_labels_bin = Data_Loader.reduce_pain_label_categories(add_train_labels_ord, max_pain=1)
add_test_labels_people = add_train_labels[:, person].astype(np.int)

# Split Data into shards
split = 6
add_train_data = np.array_split(add_train_data, split)
train_labels_bin = np.array_split(train_labels_bin, split)
add_test_labels_people = np.array_split(add_test_labels_people, split)

0 images processed
1000 images processed
2000 images processed
3000 images processed
4000 images processed
5000 images processed
6000 images processed
7000 images processed
8000 images processed
9000 images processed


W0724 00:14:20.144496 4775114176 deprecation.py:323] From /Users/nico/PycharmProjects/FederatedLearning/venv/lib/python3.7/site-packages/tensorflow/python/keras/metrics.py:1792: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


0 images processed
1000 images processed
2000 images processed
3000 images processed
4000 images processed
5000 images processed
6000 images processed
7000 images processed
8000 images processed
9000 images processed
10000 images processed
11000 images processed
12000 images processed
13000 images processed
14000 images processed


In [3]:
experiment = 'TEST'
clients = 10
dataset = 'PAIN'
rounds = 1

In [4]:
# Train on additional shards and evaluate performance
shard_counter = 1
for data, labels, people in zip(add_train_data, train_labels_bin, add_test_labels_people):
    experiment_new = experiment + "_shard-{}".format(shard_counter)
    experiment_federated_pain(clients, dataset, experiment_new, data, labels, test_data, test_labels_bin, rounds,
                                  people=people)
    shard_counter += 1


----------------------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------------- Communication Round 1 --------------------------------------------------------

----------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------- Client 0 --------------------------------------------------------------
Train on 247 samples

----------------------------------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------- Client 1 --------------------------------------------------------------
Train on 247 samples

---------------------------------------------------------------------------------------------------------------------------------------

ValueError: all the input array dimensions except for the concatenation axis must match exactly

In [50]:
split = [int(0.01 * len(x)), int(0.1 * len(x))]
np.array_split(x, split)

[array([0]),
 array([1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
        27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
        44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
        61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
        78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
        95, 96, 97, 98, 99])]

In [69]:
def split_data_into_shards(data, labels, split):
    """
    Utility function, splitting data into specified subsets of shards. Scales the split array to 100%.

    :param data:                numpy arrray
    :param labels:              numpy array
    :param split:               list of percentage split points, e.g. [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
                                final point serves for scaling only, i.e. no array will be split after that point
    :return:
    """
    split = [int(x / max(split) * len(data)) for x in split][:-1]
    data = np.array_split(data, split)
    labels = np.array_split(labels, split)
    return data, labels

In [70]:
def cumconc(array):
    total = np.concatenate(array)
    return np.array([*map(total.__getitem__, map(slice, np.fromiter(map(len, array), int, len(array)).cumsum()))])

In [76]:
data = np.arange(100)*1.1
labels = np.arange(100,200)
split = [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]

In [77]:
data, labels = split_data_into_shards(data, labels, split)

array([array([0.]), array([0. , 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7]),
       array([ 0. ,  1.1,  2.2,  3.3,  4.4,  5.5,  6.6,  7.7,  8.8,  9.9, 11. ,
       12.1, 13.2, 14.3, 15.4, 16.5]),
       array([ 0. ,  1.1,  2.2,  3.3,  4.4,  5.5,  6.6,  7.7,  8.8,  9.9, 11. ,
       12.1, 13.2, 14.3, 15.4, 16.5, 17.6, 18.7, 19.8, 20.9, 22. , 23.1,
       24.2, 25.3, 26.4, 27.5, 28.6, 29.7, 30.8, 31.9, 33. , 34.1, 35.2]),
       array([ 0. ,  1.1,  2.2,  3.3,  4.4,  5.5,  6.6,  7.7,  8.8,  9.9, 11. ,
       12.1, 13.2, 14.3, 15.4, 16.5, 17.6, 18.7, 19.8, 20.9, 22. , 23.1,
       24.2, 25.3, 26.4, 27.5, 28.6, 29.7, 30.8, 31.9, 33. , 34.1, 35.2,
       36.3, 37.4, 38.5, 39.6, 40.7, 41.8, 42.9, 44. , 45.1, 46.2, 47.3,
       48.4, 49.5, 50.6, 51.7, 52.8, 53.9]),
       array([ 0. ,  1.1,  2.2,  3.3,  4.4,  5.5,  6.6,  7.7,  8.8,  9.9, 11. ,
       12.1, 13.2, 14.3, 15.4, 16.5, 17.6, 18.7, 19.8, 20.9, 22. , 23.1,
       24.2, 25.3, 26.4, 27.5, 28.6, 29.7, 30.8, 31.9, 33. , 34.1, 35.2,
       36.3, 3