<a href="https://colab.research.google.com/github/shubham2279/FL-CATE/blob/main/FL_CAT_Single_Sheet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
###############   Base FL    ###################
from datetime import datetime
import random as ran
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
print("Tensorflow version = ", tf. __version__)
# recipe is used to calculate object size in bytes and suggested to be used by
# python documentation
# import sys as re
# pympler is used to calculate object size in bytes
!pip install pympler
from pympler import asizeof # importing the asizeof function specifically
import sys

# *****************************************************************************

# constant variables
# learning rate
ALPHA = 0.1
FLUX = 0.15 # percentage value 0-1
ENERGY = 4 # energy units cost per bit
WEIGHTS = 15 # max weight
NODES = 6 # num of nodes in network including sink
MAX_COST = 1000
TRANSMITION_COST = 10
ITERATIONS = 5
# *****************************************************************************
# setup of the neural network model's layers initialisation
class Model_Node(Model):
    # initialise model using the tensorflow recommended layers

    grads = []
    num_of_vars = 0
    node_id = None

    def __init__(self, nid):
        super(Model_Node, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)
        self.num_of_vars = -1
        self.node_id = nid
        # Initialize the optimizer here to ensure it's aware of the model's variables
        self.optimizer = tf.keras.optimizers.SGD(learning_rate=ALPHA)

    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)
    def __getitem__(self, item): return self
    # initialising loss computatuin function
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)

    # # initialising stochastic gradient descent function
    # optimizer = tf.keras.optimizers.SGD(learning_rate=ALPHA)

    # setting up the metrics collection tensors
    train_loss = tf.keras.metrics.Mean(name='train_loss'+str(node_id))
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy'+str(node_id))

    test_loss = tf.keras.metrics.Mean(name='test_loss'+str(node_id))
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy'+str(node_id))

    # model train function
    @tf.function
    def train_step(self, images, labels, glo=False, g=None):
        # if glo=true then it updates the global model using the gradient
        # given in argument g
        if glo:
            # self.optimizer.apply_gradients(zip(g, self.trainable_variables),experimental_aggregate_gradients=True)
            self.optimizer.apply_gradients(zip(g, self.trainable_variables))
            return
        with tf.GradientTape() as tape:
            predictions = self(images, training=True)
            loss = self.loss_object(labels, predictions)
        gradients = tape.gradient(loss, self.trainable_variables);
        if self.num_of_vars < 0: self.num_of_vars = len(self.trainable_variables)
        # applies training gradients using stochastic gradient descend
        # self.optimizer.apply_gradients(zip(gradients,self.trainable_variables),experimental_aggregate_gradients=False)
        self.optimizer.apply_gradients(zip(gradients,
                                           self.trainable_variables))
        # calculates loss and accuracy
        self.train_loss(loss)
        self.train_accuracy(labels, predictions)
        x = self.num_of_vars
        # x=self.num
        self.grads = gradients[-x:]
        # returns the gradient
        return self.grads

    # model predict function
    @tf.function
    def test_step(self, images, labels):
        # predicts the label
        predictions = self(images, training=False)
        t_loss = self.loss_object(labels, predictions)
        # calculates loss and accuracy
        self.test_loss(t_loss)
        self.test_accuracy(labels, predictions)

# *****************************************************************************

# function for averaging the gradients
def avgs(matrices = []):
    segmentIDs = [0] * len(matrices)
    return tf.math.segment_mean(matrices, segmentIDs)

# function returning the shortest paths for each node
def shortest_paths():
    paths = []
    for i in range(1, NODES):
        path_cost = MAX_COST
        path = None
        for p in fwd_table[i]:
            cost = len(p) * TRANSMITION_COST
            for hop in p: cost += edges[hop]
            if cost < path_cost: path_cost, path = cost, p
        paths.append((path, path_cost))
    return paths

# *****************************************************************************

# array of nodes in network
nodes = [0, 1, 2, 3, 4, 5]
# dictionary of edges in the graph and their weights
edges = {'a': 0, 'b': 0, 'c': 0, 'd': 0, 'e': 0,
         'f': 0, 'g': 0, 'h': 0, 'i': 0}
# forwarding table containing all routing paths
fwd_table = [None, ['a', 'cb'], ['b', 'ca'],
             ['da', 'dcb', 'iea', 'iecb', 'ifb', 'ifca'],
             ['ea', 'ecb', 'fb', 'fca'],
             ['gb', 'gca', 'hfb', 'hfca', 'hea', 'hecb']]

# *****************************************************************************

start_time =  datetime.now().strftime("%H:%M:%S")

# sim run
mnist = tf.keras.datasets.mnist
print("loading dataset...")
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(5)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

init = None
nds = [[], [], [], [], []]
i = 0
for img, lbl in train_ds:
        if i == 0: init = (img, lbl); i += 1; continue
        nds[i%5].append((img, lbl))
        i += 1
print("dataset split")
# *****************************************************************************
print("initialiasing nodes")
nodes = []
for i in range(0, NODES):
    nodes.append(Model_Node(i)); print("node", i, "initialised")
    nodes[-1].train_loss.reset_state()
    nodes[-1].train_accuracy.reset_state()
    nodes[-1].test_loss.reset_state()
    nodes[-1].test_accuracy.reset_state()
# *****************************************************************************
j = 0
grad, acc, bits = [], [], []
gs = [[], [], [], [], [], []]
# initial train of all nodes
for i in range(0, NODES):
    nodes[i].train_step(init[0], init[1])
    nodes[i].set_weights(nodes[0].get_weights())
# main training and testing loop
for i in range(0, len(nds[0])):
    print("Iteration: ", i)
    # train the local models
    for j in range(1, NODES):
        print("training local model ", j)
        g = nodes[j].train_step(nds[j-1][i][0], nds[j-1][i][1])
        # save gradients in list
        for k in range(0, 6): gs[k].append(g[k])
    print("computing avgs")
    for j in range(0, 6): grad.append(avgs(gs[j])[0])
    print("avgs computed")
    # update the global model
    nodes[0].train_step(None, None, glo=True, g=grad)
    print("global model updated")
    # update local models on the updated global
    for j in range(1, NODES): nodes[j].set_weights(nodes[0].get_weights())
    # test updated global model
    for ti, tl in test_ds: nodes[0].test_step(ti, tl)
    # gather metrics for graphs
    acc.append(nodes[0].test_accuracy.result() * 100)
    print("Accuracy: ", acc[-1])
    size = 0
    # for g in grad: size += re.total_size(g)
    for g in grad: size += asizeof.asizeof(g)
    bits.append(size)
    print("Bits: ", size)
    # reset variables for next iteration
    grad = []
    gs = [[], [], [], [], [], []]
    nodes[0].test_loss.reset_state()
    nodes[0].test_accuracy.reset_state()
# run another last iteration to account for the leftover records
for i in range(2, NODES):
    g = nodes[i].train_step(nds[i-1][-1][0], nds[i-1][-1][1])
    for k in range(0, 6): gs[k].append(g[k])
for j in range(0, 6): grad.append(avgs(gs[j])[0])
# final update of the global model
nodes[0].train_step(None, None, glo=True, g=grad)
# test updated global model
for ti, tl in test_ds: nodes[0].test_step(ti, tl)
# gather metrics for graphs
acc.append(nodes[0].test_accuracy.result() * 100)
print("Accuracy: ", acc[-1])
size = 0
# for g in grad: size += re.total_size(g)
for g in grad: size += asizeof.asizeof(g)
bits.append(size)
print("Bits: ", size)
# *****************************************************************************
# plot and save graphs to file
print("plotting figures")
plt.figure(1, figsize=(20,20))
plt.title('FL Accuracy', fontsize=25)
plt.plot(range(0, len(acc)), acc, alpha=0.8)
plt.savefig("baseline_acc.png", bbox_inches='tight')

plt.figure(2, figsize=(20,20))
plt.title('FL size', fontsize=25)
plt.plot(range(0, len(bits)), bits, alpha=0.8)
plt.savefig("baseline_size.png", bbox_inches='tight')


print("script finished running")
print("start time: ", start_time, " --------- end time:",
      datetime.now().strftime("%H:%M:%S"))


Tensorflow version =  2.17.0
loading dataset...
dataset split
initialiasing nodes
node 0 initialised
node 1 initialised
node 2 initialised
node 3 initialised
node 4 initialised
node 5 initialised
Iteration:  0
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
computing avgs
avgs computed
global model updated


KeyboardInterrupt: 

In [46]:
from datetime import datetime
import random as ran
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
import sys, math

# *****************************************************************************

# constant variables
ALPHA = 0.1 # learning rate
ENERGY = 2 # energy units cost per bit
NODES = 6 # num of nodes in network including sink
SGD_STEPS = 5 # num of stochastic gradient descent steps in between updates
SPARSIFICATION_THRESHOLD = 0.001
# *****************************************************************************
# array of nodes in network
paths = {3 : "310", 4: "420", 5: "520", 1 : "10", 2 : "20"}
costs = {0: 0, 1: 3, 2: 1, 3: 0.5, 4: 2 , 5:1.5}
# *****************************************************************************
# setup of the neural network model's layers initialisation

class Model_Node(Model):
    # initialise model using the tensorflow recommended layers

    grads = []
    num_of_vars = 0
    node_id = None

    def __init__(self, nid):
        super(Model_Node, self).__init__()
        self.W = tf.Variable(tf.ones([784, 10]), name="weight")
        self.b = tf.Variable(tf.zeros([10]), name="bias")
        self.num_of_vars = -1
        self.node_id = nid
        self.optimizer = tf.keras.optimizers.SGD(learning_rate=ALPHA)

    def call(self, x):
        return tf.nn.softmax(tf.matmul(x, self.W) + self.b)

    def __getitem__(self, item): return self

    # initialising stochastic gradient descent function
    # optimizer = tf.keras.optimizers.SGD(learning_rate=ALPHA)

    # setting up the metrics collection tensors
    train_loss = tf.keras.metrics.Mean(name='train_loss'+str(node_id))
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy'+str(node_id))

    test_loss = tf.keras.metrics.Mean(name='test_loss'+str(node_id))
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy'+str(node_id))

    def loss_func(self, y_true, y_pred):
        y_true = tf.one_hot(y_true, depth=10)
        y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
        return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred),1))


    def accuracy(self, y_true, y_pred):
        correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
        return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # model train function
    @tf.function
    def train_step(self, images, labels, glo=False, g=None):
        # if glo=true then it updates the global model using the gradient
        # given in argument g
        if glo:
            # self.optimizer.apply_gradients(zip(g, self.trainable_variables),
            #                                experimental_aggregate_gradients=True)
            self.optimizer.apply_gradients(zip(g, self.trainable_variables))
            return
        with tf.GradientTape() as tape:
            predictions = self(images, training=True)
            tloss = self.loss_func(labels, predictions)
        gradients = tape.gradient(tloss, self.trainable_variables);
        if self.num_of_vars < 0: self.num_of_vars = len(self.trainable_variables)
        # applies training gradients using stochastic gradient descend
        # self.optimizer.apply_gradients(zip(gradients,
        #                                    self.trainable_variables),
        #                                experimental_aggregate_gradients=False)
        self.optimizer.apply_gradients(zip(gradients,
                                           self.trainable_variables))
        # calculates loss and accuracy
        self.train_loss(tloss)
        self.train_accuracy(labels, predictions)
        x = self.num_of_vars
        self.grads = gradients[-x:]
        # returns the gradient
        return self.grads

    # model predict function
    @tf.function
    def test_step(self, images, labels):
        # predicts the label
        predictions = self(images, training=False)
        t_loss = self.loss(labels, predictions)
        # calculates loss and accuracy
        self.test_loss(t_loss)
        self.test_accuracy(labels, predictions)

# *****************************************************************************
# function for averaging the gradients
def avgs(matrices = []):
    segmentIDs = [0] * len(matrices)
    return tf.math.segment_mean(matrices, segmentIDs)

# function returning the enegry cost
def compute_transmission_cost(node, bt):
    cost = 0
    for c in paths[node]:
        snr = ran.randint(10, 30)
        cost += ((costs[int(c)] * ENERGY * 8 * bt) / math.log2(1 + snr))
    return cost
# *****************************************************************************
start_time =  datetime.now().strftime("%H:%M:%S")

# sim run
mnist = tf.keras.datasets.mnist
print("loading dataset...")
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Converting data to float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# Flatten images to 1-D vector of 784 features (28*28).
x_train, x_test = x_train.reshape(x_train.shape[0], -1), x_test.reshape(x_test.shape[0], -1)
# Normalize images value from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(5)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(5)

init = None
nds = [[], [], [], [], []]
i = 0
for img, lbl in train_ds:
        if i == 0: init = (img, lbl); i += 1; continue
        nds[i%5].append((img, lbl))
        i += 1
print("dataset split")
# *****************************************************************************
print("initialiasing nodes")
nodes = []
for i in range(0, NODES):
    nodes.append(Model_Node(i)); print("node", i, "initialised")
    nodes[-1].train_loss.reset_state()
    nodes[-1].train_accuracy.reset_state()
    nodes[-1].test_loss.reset_state()
    nodes[-1].test_accuracy.reset_state()
# *****************************************************************************
j, ecost = 0, 0
ecosts, acc, los, bits = [], [], [], []
gs = [[], [], [], [], [], []]
# initial train of all nodes
for i in range(0, NODES):
    nodes[i].train_step(init[0], init[1])
    if i == 0: continue
    nodes[i].set_weights(nodes[0].get_weights())
# main training and testing loop
for i in range(0, len(nds[0])):
    print("Iteration: ", i)
    # train the local models
    for j in range(1, NODES):
        print("training local model ", j)
        g = nodes[j].train_step(nds[j-1][i][0], nds[j-1][i][1])
    idiv = i % SGD_STEPS # checks how many steps of SGD have been run
    if idiv == (SGD_STEPS - 1):
        ws = nodes[0].get_weights()
        nd = [0] * (NODES - 1)
        # sparsify gradient (client side)
        for j in range(1, NODES):
            g = nodes[j].get_weights()
            totsize, size = 0, 0
            for k in range(0, 2): g[k] = ws[k] - g[k]
            for gg, k in zip(g, range(0, 2)):
                totsize += gg.nbytes
                gg[np.abs(gg) < SPARSIFICATION_THRESHOLD] = 0
                ind = gg.nonzero()
                compressed = gg[ind]
                size += compressed.nbytes + sys.getsizeof(ind)
                gs[k].append((ind, compressed))
            percentage = (size / totsize) * 100
            nd[j-1] = size
            print("Node ", j, ": sparsified and sent ",
                  percentage, "% of original gradient")
        # average differences (server side)
        for j in range(0, 2):
            l = len(gs[j])
            if l == 0: ws[j] = np.zeros(ws[j].shape, dtype="float32"); continue
            s = np.zeros(ws[j].shape, dtype="float32")
            for g in gs[j]:
                decompressed = np.zeros(ws[j].shape, dtype="float32")
                decompressed[g[0]] = g[1]
                s += decompressed
            ws[j] = s / l
        # update global model
        # nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables),
        #                                    experimental_aggregate_gradients=True)
        nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables))
        # gather metrics for graphs
        for ti, tl in test_ds: nodes[0].test_step(ti, tl)
        acc.append(nodes[0].test_accuracy.result() * 100)
        los.append(nodes[0].test_loss.result())
        print("Accuracy: ", acc[-1])
        print("Loss: ", los[-1])
        for j in range(1, NODES):
            ecost += compute_transmission_cost(j, nd[j-1])
        ecosts.append(ecost)
        print("Energy cost: ", ecost)
        size = 0
        for n in nd: size += n * 8
        bits.append(size)
        print("Bits: ", bits[-1])
        gs = [[], [], [], [], [], []]
        for j in range(1, NODES):
            nodes[j].set_weights(nodes[0].get_weights())
        nodes[0].test_loss.reset_state()
        nodes[0].test_accuracy.reset_state()
# run another last iteration to account for the leftover records
print("Iteration: ", i+1)
print("Remainder: ", i % SGD_STEPS)
ws = nodes[0].get_weights()
nd = [0] * (NODES - 1)
# sparsify gradient
# sparsify gradient (client side)
for j in range(1, NODES):
    g = nodes[j].get_weights()
    totsize, size = 0, 0
    for k in range(0, 2): g[k] = ws[k] - g[k]
    for gg, k in zip(g, range(0, 2)):
        totsize += gg.nbytes
        gg[np.abs(gg) < SPARSIFICATION_THRESHOLD] = 0
        ind = gg.nonzero()
        compressed = gg[ind]
        size += compressed.nbytes + sys.getsizeof(ind)
        gs[k].append((ind, compressed))
    percentage = (size / totsize) * 100
    nd[j-1] = size
    print("Node ", j, ": sparsified and sent ",
          percentage, "% of original gradient")
# average differences (server side)
for j in range(0, 2):
    l = len(gs[j])
    if l == 0: ws[j] = np.zeros(ws[j].shape, dtype="float32"); continue
    s = np.zeros(ws[j].shape, dtype="float32")
    for g in gs[j]:
        decompressed = np.zeros(ws[j].shape, dtype="float32")
        decompressed[g[0]] = g[1]
        s += decompressed
    ws[j] = s / l
# update global model
nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables),
                                   experimental_aggregate_gradients=True)
# gather metrics for graphs
for ti, tl in test_ds: nodes[0].test_step(ti, tl)
acc.append(nodes[0].test_accuracy.result() * 100)
los.append(nodes[0].test_loss.result())
print("Accuracy: ", acc[-1])
print("Loss: ", los[-1])
for j in range(1, NODES):
    ecost += compute_transmission_cost(j, nd[j-1])
ecosts.append(ecost)
print("Energy cost: ", ecost)
size = 0
for n in nd: size += n * 8
bits.append(size)
print("Bits: ", bits[-1])
# *****************************************************************************
print("script finished running")
print("start time: ", start_time, " --------- end time:",
      datetime.now().strftime("%H:%M:%S"))

loading dataset...
dataset split
initialiasing nodes
node 0 initialised
node 1 initialised
node 2 initialised
node 3 initialised
node 4 initialised
node 5 initialised


ValueError: in user code:

    File "<ipython-input-46-2d7c9d85a91c>", line 86, in train_step  *
        self.optimizer.apply_gradients(zip(gradients,
    File "/usr/local/lib/python3.10/dist-packages/keras/src/optimizers/base_optimizer.py", line 281, in apply_gradients  **
        grads, trainable_variables = zip(*grads_and_vars)

    ValueError: not enough values to unpack (expected 2, got 0)


In [47]:
############  CONSTANT T SPARSIFICATION   #############
from datetime import datetime
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
import sys

# *****************************************************************************

# constant variables
ALPHA = 0.1 # learning rate
ENERGY = 2 # energy units cost per bit
NODES = 6 # num of nodes in network including sink
SGD_STEPS = 5 # num of stochastic gradient descent steps in between updates
T_PERCENTAGE = 0.04
# *****************************************************************************
# array of nodes in network
paths = {3 : "310", 4: "420", 5: "520", 1 : "10", 2 : "20"}
costs = {0: 0, 1: 3, 2: 1, 3: 0.5, 4: 2 , 5:1.5}
# *****************************************************************************
# setup of the neural network model's layers initialisation
class Model_Node(Model):
    # initialise model using the tensorflow recommended layers

    grads = []
    num_of_vars = 0
    node_id = None

    def __init__(self, nid):
        super(Model_Node, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)
        self.num_of_vars = -1
        self.node_id = nid
        self.optimizer = tf.keras.optimizers.SGD(learning_rate=ALPHA)


    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)
    def __getitem__(self, item): return self
    # initialising loss computatuin function
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)

    # initialising stochastic gradient descent function
    # optimizer = tf.keras.optimizers.SGD(learning_rate=ALPHA)

    # setting up the metrics collection tensors
    train_loss = tf.keras.metrics.Mean(name='train_loss'+str(node_id))
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy'+str(node_id))

    test_loss = tf.keras.metrics.Mean(name='test_loss'+str(node_id))
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy'+str(node_id))

    # model train function
    @tf.function
    def train_step(self, images, labels, glo=False, g=None):
        # if glo=true then it updates the global model using the gradient
        # given in argument g
        if glo:
            # self.optimizer.apply_gradients(zip(g, self.trainable_variables),experimental_aggregate_gradients=True)
            self.optimizer.apply_gradients(zip(g, self.trainable_variables))
            return
        with tf.GradientTape() as tape:
            predictions = self(images, training=True)
            loss = self.loss_object(labels, predictions)
        gradients = tape.gradient(loss, self.trainable_variables);
        if self.num_of_vars < 0: self.num_of_vars = len(self.trainable_variables)
        # applies training gradients using stochastic gradient descend
        # self.optimizer.apply_gradients(zip(gradients,self.trainable_variables), experimental_aggregate_gradients=False)
        self.optimizer.apply_gradients(zip(gradients,self.trainable_variables))
        # calculates loss and accuracy
        self.train_loss(loss)
        self.train_accuracy(labels, predictions)
        x = self.num_of_vars
        self.grads = gradients[-x:]
        # returns the gradient
        return self.grads

    # model predict function
    @tf.function
    def test_step(self, images, labels):
        # predicts the label
        predictions = self(images, training=False)
        t_loss = self.loss_object(labels, predictions)
        # calculates loss and accuracy
        self.test_loss(t_loss)
        self.test_accuracy(labels, predictions)

# *****************************************************************************
# function for averaging the gradients
def avgs(matrices = []):
    segmentIDs = [0] * len(matrices)
    return tf.math.segment_mean(matrices, segmentIDs)

# function returning the enegry cost
def compute_transmission_cost(node):
    cost = 0
    for c in paths[node]: cost += (costs[int(c)] * ENERGY)
    return cost
# *****************************************************************************
start_time =  datetime.now().strftime("%H:%M:%S")

# sim run
mnist = tf.keras.datasets.mnist
print("loading dataset...")
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(5)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

init = None
nds = [[], [], [], [], []]
i = 0
for img, lbl in train_ds:
        if i == 0: init = (img, lbl); i += 1; continue
        nds[i%5].append((img, lbl))
        i += 1
print("dataset split")
# *****************************************************************************
print("initialiasing nodes")
nodes = []
for i in range(0, NODES):
    nodes.append(Model_Node(i)); print("node", i, "initialised")
    nodes[-1].train_loss.reset_state()
    nodes[-1].train_accuracy.reset_state()
    nodes[-1].test_loss.reset_state()
    nodes[-1].test_accuracy.reset_state()
# *****************************************************************************
j, ecost = 0, 0
ecosts, acc, los, bits = [], [], [], []
gs = []
# initial train of all nodes
for i in range(0, NODES):
    nodes[i].train_step(init[0], init[1])
    if i == 0: continue
    nodes[i].set_weights(nodes[0].get_weights())
gsizes = []
totg = 0 #total number of gradients in model
for g in nodes[0].get_weights(): gsizes.append(g.size); totg += g.size
maxg = int(T_PERCENTAGE * totg) # number of max values to send
# main training and testing loop
for i in range(0, len(nds[0])):
    print("Iteration: ", i)
    # train the local models
    for j in range(1, NODES):
        print("training local model ", j)
        g = nodes[j].train_step(nds[j-1][i][0], nds[j-1][i][1])
    idiv = i % SGD_STEPS # checks how many steps of SGD have been run
    if idiv == (SGD_STEPS - 1):
        ws = nodes[0].get_weights()
        nd = [0] * (NODES - 1)
        # t-compress gradient (client side)
        for j in range(1, NODES):
            print("Starting gradient compression for node", j, "at",
                  datetime.now().strftime("%H:%M:%S"))
            g = nodes[j].get_weights()
            totsize, size = 0, 0
            for k in range(0, 6): g[k] = ws[k] - g[k]
            gflat = np.concatenate(g, axis=None)
            totsize = gflat.nbytes
            inds = np.argpartition(abs(gflat), -maxg)[-maxg:]
            size = np.array(inds).nbytes + np.array(gflat[inds]).nbytes
            gs.append([inds, gflat[inds]])
            percentage = (size / totsize) * 100
            nd[j-1] = size
            print("Node ", j, ": compressed and sent ",
                  percentage, "% of original gradient")
            print("Compression finished at", datetime.now().strftime("%H:%M:%S"))
        # average differences (server side)
        for j in range(0, 6): ws[j] = np.zeros(ws[j].shape, dtype="float32")
        d = {}
        for e in gs:
            for gi, gv in zip(e[0], e[1]): d.setdefault(gi, []).append(gv)
        for gi, gv in d.items():
            for k in range(0, 6):
                if gi < gsizes[k]:
                    ws[k][np.unravel_index(gi, ws[k].shape)] = np.mean(gv)
                    break
                gi -= gsizes[k]
        # update global model
        # nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables), experimental_aggregate_gradients=True)
        nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables))

        # gather metrics for graphs
        for ti, tl in test_ds: nodes[0].test_step(ti, tl)
        acc.append(nodes[0].test_accuracy.result() * 100)
        los.append(nodes[0].test_loss.result())
        print("Accuracy: ", acc[-1])
        print("Loss: ", los[-1])
        for j in range(1, 6):
            ct = compute_transmission_cost(j)
            ecost += ct * nd[j-1] * 8
        ecosts.append(ecost)
        print("Energy cost: ", ecost)
        size = 0
        for n in nd: size += n * 8
        bits.append(size)
        print("Bits: ", bits[-1])
        gs = []
        for j in range(1, NODES):
            nodes[j].set_weights(nodes[0].get_weights())
        nodes[0].test_loss.reset_state()
        nodes[0].test_accuracy.reset_state()
# run another last iteration to account for the leftover records
print("Iteration: ", i+1)
print("Remainder: ", i % SGD_STEPS)
ws = nodes[0].get_weights()
nd = [0] * (NODES - 1)
# t-compress gradient (client side)
for j in range(1, NODES):
    print("Starting gradient compression for node", j, "at",
          datetime.now().strftime("%H:%M:%S"))
    g = nodes[j].get_weights()
    totsize, size = 0, 0
    for k in range(0, 6): g[k] = ws[k] - g[k]
    gflat = np.concatenate(g, axis=None)
    totsize = gflat.nbytes
    inds = np.argpartition(abs(gflat), -maxg)[-maxg:]
    size = np.array(inds).nbytes + np.array(gflat[inds]).nbytes
    gs.append([inds, gflat[inds]])
    percentage = (size / totsize) * 100
    nd[j-1] = size
    print("Node ", j, ": compressed and sent ",
          percentage, "% of original gradient")
    print("Compression finished at", datetime.now().strftime("%H:%M:%S"))
# average differences (server side)
for j in range(0, 6): ws[j] = np.zeros(ws[j].shape, dtype="float32")
d = {}
for e in gs:
    for gi, gv in zip(e[0], e[1]): d.setdefault(gi, []).append(gv)
for gi, gv in d.items():
    for k in range(0, 6):
        if gi < gsizes[k]:
            ws[k][np.unravel_index(gi, ws[k].shape)] = np.mean(gv)
            break
        gi -= gsizes[k]
# update global model
# nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables),
#                                    experimental_aggregate_gradients=True)
nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables))

# gather metrics for graphs
for ti, tl in test_ds: nodes[0].test_step(ti, tl)
acc.append(nodes[0].test_accuracy.result() * 100)
los.append(nodes[0].test_loss.result())
print("Accuracy: ", acc[-1])
print("Loss: ", los[-1])
for j in range(1, 6):
    ct = compute_transmission_cost(j)
    ecost += ct * nd[j-1] * 8
ecosts.append(ecost)
print("Energy cost: ", ecost)
size = 0
for n in nd: size += n * 8
bits.append(size)
print("Bits: ", bits[-1])
# *****************************************************************************
# plot and save graphs to file
print("plotting figures")
plt.figure(1, figsize=(20,20))
plt.title('FL Accuracy', fontsize=25)
plt.plot(range(0, len(acc)), acc, alpha=0.8)
pngname = "baseline_acc_tcompressed.png"
print(pngname)
plt.savefig(pngname, bbox_inches='tight')

plt.figure(2, figsize=(20,20))
plt.title('FL size', fontsize=25)
plt.plot(range(0, len(bits)), bits, alpha=0.8)
pngname = "baseline_size_tcompressed.png"
print(pngname)
plt.savefig(pngname, bbox_inches='tight')

plt.figure(3, figsize=(20,20))
plt.title('FL Energy Consupmtioon', fontsize=25)
plt.plot(range(0, len(ecosts)), ecosts, alpha=0.8)
pngname = "baseline_energy_tcompressed.png"
print(pngname)
plt.savefig(pngname, bbox_inches='tight')
# *****************************************************************************
print("script finished running")
print("start time: ", start_time, " --------- end time:",
      datetime.now().strftime("%H:%M:%S"))

loading dataset...
dataset split
initialiasing nodes
node 0 initialised
node 1 initialised
node 2 initialised
node 3 initialised
node 4 initialised
node 5 initialised
Iteration:  0
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Iteration:  1
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Iteration:  2
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Iteration:  3
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Iteration:  4
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Starting gradient compression for node 1 at 19:36:07
Node  1 : compressed and sent  11.999961019752158 % of original gradient
Compression finished at 19:36:07
Star

KeyboardInterrupt: 

In [51]:
############  CAT SPARSIFICATION   #############
from datetime import datetime
import tensorflow as tf
import numpy as np
from numpy import linalg as la
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
import sys
import random as rand

# *****************************************************************************

# constant variables
ALPHA = 0.1 # learning rate
ENERGY = 2 # energy units cost per bit
NODES = 6 # num of nodes in network including sink
SGD_STEPS = 5 # num of stochastic gradient descent steps in between updates
T_PERCENTAGE = 0.04
# *****************************************************************************
# array of nodes in network
paths = {3 : "310", 4: "420", 5: "520", 1 : "10", 2 : "20"}
costs = {0: 0, 1: 3, 2: 1, 3: 0.5, 4: 2 , 5:1.5}
# *****************************************************************************
# setup of the neural network model's layers initialisation
class Model_Node(Model):
    # initialise model using the tensorflow recommended layers

    grads = []
    num_of_vars = 0
    node_id = None

    def __init__(self, nid):
        super(Model_Node, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)
        self.num_of_vars = -1
        self.node_id = nid
        self.optimizer = tf.keras.optimizers.SGD(learning_rate=ALPHA)

    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)
    def __getitem__(self, item): return self
    # initialising loss computatuin function
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)

    # initialising stochastic gradient descent function
    # optimizer = tf.keras.optimizers.SGD(learning_rate=ALPHA)

    # setting up the metrics collection tensors
    train_loss = tf.keras.metrics.Mean(name='train_loss'+str(node_id))
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy'+str(node_id))

    test_loss = tf.keras.metrics.Mean(name='test_loss'+str(node_id))
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy'+str(node_id))

    # model train function
    @tf.function
    def train_step(self, images, labels, glo=False, g=None):
        # if glo=true then it updates the global model using the gradient
        # given in argument g
        if glo:
            # self.optimizer.apply_gradients(zip(g, self.trainable_variables),
            #                                experimental_aggregate_gradients=True)
            self.optimizer.apply_gradients(zip(g, self.trainable_variables))
            return
        with tf.GradientTape() as tape:
            predictions = self(images, training=True)
            loss = self.loss_object(labels, predictions)
        gradients = tape.gradient(loss, self.trainable_variables);
        if self.num_of_vars < 0: self.num_of_vars = len(self.trainable_variables)
        # applies training gradients using stochastic gradient descend
        # self.optimizer.apply_gradients(zip(gradients,
        #                                    self.trainable_variables),
        #                                experimental_aggregate_gradients=False)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        # calculates loss and accuracy
        self.train_loss(loss)
        self.train_accuracy(labels, predictions)
        x = self.num_of_vars
        self.grads = gradients[-x:]
        # returns the gradient
        return self.grads

    # model predict function
    @tf.function
    def test_step(self, images, labels):
        # predicts the label
        predictions = self(images, training=False)
        t_loss = self.loss_object(labels, predictions)
        # calculates loss and accuracy
        self.test_loss(t_loss)
        self.test_accuracy(labels, predictions)

# *****************************************************************************
# function for averaging the gradients
def avgs(matrices = []):
    segmentIDs = [0] * len(matrices)
    return tf.math.segment_mean(matrices, segmentIDs)

# function returning the enegry cost
def compute_transmission_cost(node):
    cost = 0
    for c in paths[node]: cost += (costs[int(c)] * ENERGY)
    return cost
# *****************************************************************************
start_time =  datetime.now().strftime("%H:%M:%S")

# sim run
mnist = tf.keras.datasets.mnist
print("loading dataset...")
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(1)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

init = None
nds = [[], [], [], [], []]
i = 0
for img, lbl in train_ds:
        if i == 0: init = (img, lbl); i += 1
        for j in range(0, 5):
            nds[j].append((img, lbl))
            nds[j].append((img, lbl))
            nds[j].append((img, lbl))
for i in range(0, 5): rand.shuffle(nds[i])
# print(len(nds[0])); sys.exit(0)
print("dataset split")
# *****************************************************************************
print("initialiasing nodes")
nodes = []
for i in range(0, NODES):
    nodes.append(Model_Node(i)); print("node", i, "initialised")
    nodes[-1].train_loss.reset_state()
    nodes[-1].train_accuracy.reset_state()
    nodes[-1].test_loss.reset_state()
    nodes[-1].test_accuracy.reset_state()
# *****************************************************************************
j, ecost = 0, 0
ecosts, acc, los, bits = [], [], [], []
gs = []
# initial train of all nodes
for i in range(0, NODES):
    nodes[i].train_step(init[0], init[1])
    if i == 0: continue
    nodes[i].set_weights(nodes[0].get_weights())
gsizes = []
totg = 0 #total number of gradients in model
for g in nodes[0].get_weights(): gsizes.append(g.size); totg += g.size
maxg = int(T_PERCENTAGE * totg) # number of max values to send
# main training and testing loop
for i in range(0, len(nds[0])):
    print("Iteration: ", i)
    # train the local models
    for j in range(1, NODES):
        print("training local model ", j)
        g = nodes[j].train_step(nds[j-1][i][0], nds[j-1][i][1])
    idiv = i % SGD_STEPS # checks how many steps of SGD have been run
    if idiv == (SGD_STEPS - 1):
        ws = nodes[0].get_weights()
        nd = [0] * (NODES - 1)
        # t-compress gradient (client side)
        for j in range(1, NODES):
            print("Starting gradient compression for node", j, "at",
                  datetime.now().strftime("%H:%M:%S"))
            g = nodes[j].get_weights()
            totsize, size = 0, 0
            for k in range(0, 6): g[k] = ws[k] - g[k]
            gflat = np.concatenate(g, axis=None)
            totsize = gflat.nbytes
            inds = np.argsort(abs(gflat))
            cc = compute_transmission_cost(j) * 8
            minval, minind = 0, 0
            for k in range(1, len(inds)+1):
                compressed = gflat[inds[-k:]]
                calc = (la.norm(compressed)**2) / (la.norm(gflat)**2)
                calc /= (cc * compressed.size)
                if calc > minval: minval = calc; minind = k; continue
                if calc < minval: break
            size = np.array(inds[-minind:]).nbytes + np.array(
                gflat[inds[-minind:]]).nbytes
            gs.append([inds[-minind:], gflat[inds[-minind:]]])
            percentage = (size / totsize) * 100
            nd[j-1] = size
            print("Node ", j, ": compressed and sent ",
                  percentage, "% of original gradient using", k-1, "gradients")
            print("Compression finished at", datetime.now().strftime("%H:%M:%S"))
        # average differences (server side)
        for j in range(0, 6): ws[j] = np.zeros(ws[j].shape, dtype="float32")
        d = {}
        for e in gs:
            for gi, gv in zip(e[0], e[1]): d.setdefault(gi, []).append(gv)
        for gi, gv in d.items():
            for k in range(0, 6):
                if gi < gsizes[k]:
                    ws[k][np.unravel_index(gi, ws[k].shape)] = np.mean(gv)
                    break
                gi -= gsizes[k]
        # update global model
        # nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables),
        #                                    experimental_aggregate_gradients=True)
        nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables))
        # gather metrics for graphs
        for ti, tl in test_ds: nodes[0].test_step(ti, tl)
        acc.append(nodes[0].test_accuracy.result() * 100)
        los.append(nodes[0].test_loss.result())
        print("Accuracy: ", acc[-1])
        print("Loss:", los[-1])
        for j in range(1, 6):
            ct = compute_transmission_cost(j)
            ecost += ct * nd[j-1] * 8
        ecosts.append(ecost)
        print("Energy cost: ", ecost)
        size = 0
        for n in nd: size += n * 8
        bits.append(size)
        print("Bits: ", bits[-1])
        gs = []
        for j in range(1, NODES): nodes[j].set_weights(nodes[0].get_weights())
        nodes[0].test_loss.reset_state()
        nodes[0].test_accuracy.reset_state()
"""# run another last iteration to account for the leftover records
print("Iteration: ", i+1)
print("Remainder: ", i % SGD_STEPS)
ws = nodes[0].get_weights()
nd = [0] * (NODES - 1)
# t-compress gradient (client side)
for j in range(1, NODES):
    print("Starting gradient compression for node", j, "at",
          datetime.now().strftime("%H:%M:%S"))
    g = nodes[j].get_weights()
    totsize, size = 0, 0
    for k in range(0, 6): g[k] = ws[k] - g[k]
    gflat = np.concatenate(g, axis=None)
    totsize = gflat.nbytes
    inds = np.argsort(abs(gflat))
    cc = compute_transmission_cost(j) * 8
    minval, minind = 0, 0
    for k in range(1, len(inds)+1):
        compressed = gflat[inds[-k:]]
        calc = (la.norm(compressed)**2) / (la.norm(gflat)**2)
        calc /= (cc * compressed.size)
        if calc > minval: minval = calc; minind = k; continue
        if calc < minval: break
    size = np.array(inds[-minind:]).nbytes + np.array(
        gflat[inds[-minind:]]).nbytes
    gs.append([inds[-minind:], gflat[inds[-minind:]]])
    percentage = (size / totsize) * 100
    nd[j-1] = size
    print("Node ", j, ": compressed and sent ",
          percentage, "% of original gradient using", k, "gradients")
    print("Compression finished at", datetime.now().strftime("%H:%M:%S"))
# average differences (server side)
for j in range(0, 6): ws[j] = np.zeros(ws[j].shape, dtype="float32")
d = {}
for e in gs:
    for gi, gv in zip(e[0], e[1]): d.setdefault(gi, []).append(gv)
for gi, gv in d.items():
    for k in range(0, 6):
        if gi < gsizes[k]:
            ws[k][np.unravel_index(gi, ws[k].shape)] = np.mean(gv)
            break
        gi -= gsizes[k]
# update global model
nodes[0].optimizer.apply_gradients(zip(ws,nodes[0].trainable_variables),
                                   experimental_aggregate_gradients=True)
"""
# gather metrics for graphs
for ti, tl in test_ds: nodes[0].test_step(ti, tl)
acc.append(nodes[0].test_accuracy.result() * 100)
los.append(nodes[0].test_loss.result())
print("Accuracy: ", acc[-1])
print("Loss:", los[-1])
for j in range(1, 6):
    ct = compute_transmission_cost(j)
    ecost += ct * nd[j-1] * 8
ecosts.append(ecost)
print("Energy cost: ", ecost)
size = 0
for n in nd: size += n * 8
bits.append(size)
print("Bits: ", bits[-1])
# *****************************************************************************
# plot and save graphs to file
print("plotting figures")
plt.figure(1, figsize=(20,20))
plt.title('FL Accuracy', fontsize=25)
plt.plot(range(0, len(acc)), acc, alpha=0.8)
pngname = "baseline_acc_tcompressed.png"
print(pngname)
plt.savefig(pngname, bbox_inches='tight')

plt.figure(2, figsize=(20,20))
plt.title('FL size', fontsize=25)
plt.plot(range(0, len(bits)), bits, alpha=0.8)
pngname = "baseline_size_tcompressed.png"
print(pngname)
plt.savefig(pngname, bbox_inches='tight')

plt.figure(3, figsize=(20,20))
plt.title('FL Energy Consupmtioon', fontsize=25)
plt.plot(range(0, len(ecosts)), ecosts, alpha=0.8)
pngname = "baseline_energy_tcompressed.png"
print(pngname)
plt.savefig(pngname, bbox_inches='tight')
# *****************************************************************************
print("script finished running")
print("start time: ", start_time, " --------- end time:",
      datetime.now().strftime("%H:%M:%S"))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Energy cost:  34944.0
Bits:  480
Iteration:  70
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Iteration:  71
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Iteration:  72
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Iteration:  73
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Iteration:  74
training local model  1
training local model  2
training local model  3
training local model  4
training local model  5
Starting gradient compression for node 1 at 19:42:28
Node  1 : compressed and sent  0.00010827846622830733 % of original gradient using 1 gradients
Compression finished at 19:42:28
Starting gradient compression for node 2 at 19

KeyboardInterrupt: 