In [35]:
import ipyparallel as ipp
n = 4
rc = ipp.Cluster(engines="mpi", n=n).start_and_connect_sync()
view = rc[:]
rc.ids

Starting 4 engines with <class 'ipyparallel.cluster.launcher.MPIEngineSetLauncher'>


  0%|          | 0/4 [00:00<?, ?engine/s]

[0, 1, 2, 3]

In [36]:
%%px --block
import pandas as pd
import tensorflow as tf
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from mpi4py import MPI
import copy
import sys
sys.path.append('../../../Decentralized-FL-Framework')
from communication import DecentralizedNoModelSGD
from comm_weights import flatten_weights, unflatten_weights
from network import Graph


np.random.seed(482)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

mpi = MPI.COMM_WORLD
bcast = mpi.bcast
barrier = mpi.barrier
rank = mpi.rank
size = mpi.size
print("MPI rank: %i/%i" % (mpi.rank, mpi.size))

[stdout:0] MPI rank: 0/4


[stdout:2] MPI rank: 2/4


[stdout:3] MPI rank: 3/4


[stdout:1] MPI rank: 1/4


## Helper Functions

In [37]:
%%px --block
# Implement Custom Loss Function
@tf.function
def consensus_loss(y_true, y_pred, z, l2):
    
    # local error
    local_loss = tf.keras.losses.SparseCategoricalCrossentropy()(y_true, y_pred)
    # consensus error
    consensus_loss = l2*tf.keras.losses.CategoricalCrossentropy()(z, y_pred)
        
    return local_loss + consensus_loss

In [38]:
 %%px --block
# Play around with this more
def set_learning_rate(optimizer, epoch):
    if epoch >= 1:
        optimizer.lr = optimizer.lr * tf.math.exp(-0.1)

In [39]:
%%px --block
def average_models(model, local_update, layer_shapes, layer_sizes):
    model_weights = model.get_weights()
    # flatten tensor weights
    coordinate_weights = flatten_weights(model_weights)
    local_weights = flatten_weights(local_update)
    next_weights = unflatten_weights(np.average([coordinate_weights, local_weights], axis=0),
                                           layer_shapes, layer_sizes)
    # update model weights to average
    model.set_weights(next_weights)

In [40]:
%%px --block
def get_model_architecture(model):
    # find shape and total elements for each layer of the resnet model
    model_weights = model.get_weights()
    layer_shapes = []
    layer_sizes = []
    for i in range(len(model_weights)):
        layer_shapes.append(model_weights[i].shape)
        layer_sizes.append(model_weights[i].size)
    return layer_shapes, layer_sizes

In [44]:
%%px --block

def data_pre_process(rank, size, train_pct, train_bs, test_bs, coordination_size, coord_bs):

    # read in CSV data
    raw_df_data = pd.read_csv("Data/Darknet.CSV", parse_dates=["Timestamp"], on_bad_lines='skip')

    # make timestamp numeric (just time of day)
    timestamp = raw_df_data["Timestamp"]
    raw_df_data["Timestamp"] = timestamp.dt.hour + timestamp.dt.minute/60 + timestamp.dt.second/3600

    # remove NaN rows and Inf
    raw_df = raw_df_data.replace([np.inf, -np.inf], np.nan)
    raw_df.dropna(inplace=True)

    # clean up the sub-labels (incorrectly labeled)
    raw_df.loc[raw_df['Label.1']=='AUDIO-STREAMING', 'Label.1'] = 'Audio-Streaming'
    raw_df.loc[raw_df['Label.1']=='AUDIO-STREAMING', 'Label.1'] = 'Audio-Streaming'
    raw_df.loc[raw_df['Label.1']=='Video-streaming', 'Label.1'] = 'Video-Streaming'
    raw_df.rename(columns = {"Label.1" : "Subtype"}, inplace = True)

    # add one-hot encoding of the sub-labels
    onehot = pd.get_dummies(raw_df['Subtype'])
    raw_df = pd.concat([raw_df, onehot], axis=1, join='inner')

    # drop IP columns and 0 columns
    ip_cols = ['Flow ID', 'Src IP', 'Dst IP', 'Active Mean', 'Active Std', 'Active Max', 
               'Active Min', 'Subflow Bwd Packets', 'Fwd Bytes/Bulk Avg', 'Fwd Packet/Bulk Avg', 
               'Fwd Bulk Rate Avg', 'Bwd Bytes/Bulk Avg', 'URG Flag Count', 'CWE Flag Count', 
               'ECE Flag Count', 'Bwd PSH Flags', 'Fwd URG Flags','Bwd URG Flags']
    raw_df.drop(ip_cols, axis=1, inplace=True)

    # label dataframe
    traffic_categories = raw_df['Label'].unique()
    tc = dict(zip(traffic_categories, range(len(traffic_categories))))
    class_attack = raw_df.Label.map(lambda a: tc[a])
    raw_df['Label'] = class_attack

    # shuffle dataset
    raw_df = raw_df.sample(frac=1)

    # extract features
    non_normalized_df = raw_df.drop(['Label', 'Subtype'], axis=1)

    # extract labels
    labels = raw_df['Label']

    # normalize the feature dataframe
    normalized_df = non_normalized_df.apply(lambda x: (x - x.min()) / (x.max() - x.min()))

    # create coordination set
    coord_x = tf.convert_to_tensor(normalized_df.iloc[:coordination_size,:])
    coord_y = tf.convert_to_tensor(labels[:coordination_size])
    coordination_set = tf.data.Dataset.from_tensor_slices((coord_x,coord_y)).batch(coord_bs)

    # get data info
    num_inputs = len(normalized_df.columns.to_list())
    num_outputs = len(traffic_categories)
    
    # Split training data amongst workers
    worker_data = np.array_split(normalized_df.iloc[coordination_size:,:], size)[rank]
    worker_label = np.array_split(labels[coordination_size:], size)[rank]
    
    # create train/test split
    num_data = len(worker_label)
    num_train =  int(num_data * train_pct)
    # train
    train_x = tf.convert_to_tensor(worker_data.iloc[:num_train,:])
    train_y = tf.convert_to_tensor(worker_label[:num_train])
    train_set = tf.data.Dataset.from_tensor_slices((train_x,train_y)).batch(train_bs)
    # test
    test_x = tf.convert_to_tensor(worker_data.iloc[num_train:,:])
    test_y = tf.convert_to_tensor(worker_label[num_train:])
    test_set = tf.data.Dataset.from_tensor_slices((test_x,test_y)).batch(test_bs)
    
    # full training set
    full_train_data = tf.convert_to_tensor(normalized_df)
    full_train_label = tf.convert_to_tensor(labels)
    
    return train_set, test_set, coordination_set, full_train_data, full_train_label, num_inputs, num_outputs

In [45]:
%%px --block
def train(model, communicator, rank, size, lossF, optimizer, train_dataset, coordination_dataset, epochs, 
          coord_batch_size, batches, num_outputs, layer_shapes, layer_sizes, l2):

    acc_metric = tf.keras.metrics.SparseCategoricalAccuracy()
    loss_metric = tf.keras.metrics.SparseCategoricalCrossentropy()
        
    for epoch in range(epochs):
        
        # Adjust learning rate
        set_learning_rate(optimizer, epoch)

        # Forward Pass of Coordination Set (get z)
        send_predicted = np.zeros((num_outputs*coord_batch_size, batches), dtype=np.float32)
        for c_batch_idx, (c_data, c_target) in enumerate(coordination_dataset):
            pred = model(c_data, training=True)
            send_predicted[:, c_batch_idx] = pred.numpy().flatten()

        # Communication Process Here
        recv_avg_pred, comm_time = communicator.average(send_predicted)

        # save initial model
        start_model = copy.deepcopy(model.get_weights())

        # Local Training
        for batch_idx, (data, target) in enumerate(train_dataset):

            # Minibatch Update
            with tf.GradientTape() as tape:
                y_p = model(data, training=True)
                loss_val = lossF(y_true=target, y_pred=y_p)
            grads = tape.gradient(loss_val, model.trainable_weights)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            acc_metric.update_state(target, y_p)
            loss_metric.update_state(target, y_p)

            # save model after local update
            local_model = copy.deepcopy(model.get_weights())

            # reset model weights
            model.set_weights(start_model)

            # Consensus Training
            for c_batch_idx, (c_data, c_target) in enumerate(coordination_dataset):
                with tf.GradientTape() as tape:
                    c_yp = model(c_data, training=True)
                    loss_val = consensus_loss(y_true=c_target, y_pred=c_yp,
                                               z=recv_avg_pred[:, c_batch_idx].reshape(coord_batch_size, num_outputs),
                                               l2=l2)
                grads = tape.gradient(loss_val, model.trainable_weights)
                optimizer.apply_gradients(zip(grads, model.trainable_variables))

            # update model weights
            average_models(model, local_model, layer_shapes, layer_sizes)
        
        print('(Rank %d) Epoch %d: Training Accuracy is %0.4f, Loss is %0.4f' % (rank, epoch, acc_metric.result(), loss_metric.result()))
        loss_metric.reset_states()
        acc_metric.reset_states()

## Run Experiments

In [46]:
%%px --block
train_pct = 0.8
train_bs = 128
test_bs = 128
coord_bs = 32**2
coordination_size = 32**2
coord_num_batches = int(np.ceil(coordination_size/coord_bs))

train_set, test_set, coord_set, full_train_x, full_train_y, num_inputs, num_outputs = data_pre_process(rank, size, train_pct, 
                                                                                                       train_bs, test_bs,
                                                                                                       coordination_size, 
                                                                                                       coord_bs)

%px:   0%|          | 0/4 [00:00<?, ?tasks/s]

[stderr:3] 2022-11-12 18:55:47.101725: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


[stderr:0] 2022-11-12 18:55:47.572574: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


[stderr:2] 2022-11-12 18:55:47.585755: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


[stderr:1] 2022-11-12 18:55:47.774726: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [47]:
%%px --block

# initialize graph
graph_type = 'ring'
G = Graph(rank, size, mpi, graph_type, weight_type='uniform-symmetric', num_c=None)

# initialize communicator
communicator = DecentralizedNoModelSGD(rank, size, mpi, G)

In [48]:
%%px --block
# multi classification model
if rank == 0 or rank == 1:
    multi_model = tf.keras.Sequential()
    multi_model.add(tf.keras.layers.Dense(128, activation='relu', input_shape=(num_inputs,)))
    multi_model.add(tf.keras.layers.Dense(256, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(128, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(64, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(10, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(num_outputs, activation='softmax'))
else:
    multi_model = tf.keras.Sequential()
    multi_model.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(num_inputs,)))
    multi_model.add(tf.keras.layers.Dense(128, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(256, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(128, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(64, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(10, activation='relu'))
    multi_model.add(tf.keras.layers.Dense(num_outputs, activation='softmax'))

# Initialize Local Loss Function
lossF = tf.keras.losses.SparseCategoricalCrossentropy()

# model architecture
layer_shapes, layer_sizes = get_model_architecture(multi_model)

# l2 penalty
l2 = 0.1

# epochs
epochs = 10

# Initialize Optimizer
learning_rate = 0.01
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [49]:
%%px --block
train(multi_model, communicator, rank, size, lossF, optimizer, train_set, 
      coord_set, epochs, coord_bs, coord_num_batches, num_outputs, layer_shapes, layer_sizes, l2)

%px:   0%|          | 0/4 [00:00<?, ?tasks/s]

[stdout:1] (Rank 1) Epoch 0: Training Accuracy is 0.7287, Loss is 0.6937
(Rank 1) Epoch 1: Training Accuracy is 0.8534, Loss is 0.4264
(Rank 1) Epoch 2: Training Accuracy is 0.8697, Loss is 0.3554
(Rank 1) Epoch 3: Training Accuracy is 0.8778, Loss is 0.3151
(Rank 1) Epoch 4: Training Accuracy is 0.8814, Loss is 0.3087
(Rank 1) Epoch 5: Training Accuracy is 0.8922, Loss is 0.2657
(Rank 1) Epoch 6: Training Accuracy is 0.8817, Loss is 0.2958
(Rank 1) Epoch 7: Training Accuracy is 0.8994, Loss is 0.2515
(Rank 1) Epoch 8: Training Accuracy is 0.8992, Loss is 0.2619
(Rank 1) Epoch 9: Training Accuracy is 0.9079, Loss is 0.2346


[stdout:0] (Rank 0) Epoch 0: Training Accuracy is 0.7823, Loss is 1.0092
(Rank 0) Epoch 1: Training Accuracy is 0.8442, Loss is 0.4970
(Rank 0) Epoch 2: Training Accuracy is 0.8663, Loss is 0.3685
(Rank 0) Epoch 3: Training Accuracy is 0.8089, Loss is 0.5461
(Rank 0) Epoch 4: Training Accuracy is 0.8711, Loss is 0.3879
(Rank 0) Epoch 5: Training Accuracy is 0.8752, Loss is 0.3104
(Rank 0) Epoch 6: Training Accuracy is 0.8886, Loss is 0.2886
(Rank 0) Epoch 7: Training Accuracy is 0.8935, Loss is 0.2710
(Rank 0) Epoch 8: Training Accuracy is 0.8982, Loss is 0.2579
(Rank 0) Epoch 9: Training Accuracy is 0.9039, Loss is 0.2460


[stdout:2] (Rank 2) Epoch 0: Training Accuracy is 0.7376, Loss is 0.6879
(Rank 2) Epoch 1: Training Accuracy is 0.8128, Loss is 0.4578
(Rank 2) Epoch 2: Training Accuracy is 0.8632, Loss is 0.3442
(Rank 2) Epoch 3: Training Accuracy is 0.8718, Loss is 0.3203
(Rank 2) Epoch 4: Training Accuracy is 0.8916, Loss is 0.2687
(Rank 2) Epoch 5: Training Accuracy is 0.8797, Loss is 0.2950
(Rank 2) Epoch 6: Training Accuracy is 0.9037, Loss is 0.2497
(Rank 2) Epoch 7: Training Accuracy is 0.9000, Loss is 0.2532
(Rank 2) Epoch 8: Training Accuracy is 0.9136, Loss is 0.2234
(Rank 2) Epoch 9: Training Accuracy is 0.9141, Loss is 0.2217


[stdout:3] (Rank 3) Epoch 0: Training Accuracy is 0.7401, Loss is 0.7400
(Rank 3) Epoch 1: Training Accuracy is 0.8467, Loss is 0.4562
(Rank 3) Epoch 2: Training Accuracy is 0.8716, Loss is 0.3425
(Rank 3) Epoch 3: Training Accuracy is 0.8739, Loss is 0.3093
(Rank 3) Epoch 4: Training Accuracy is 0.8794, Loss is 0.2895
(Rank 3) Epoch 5: Training Accuracy is 0.8829, Loss is 0.2897
(Rank 3) Epoch 6: Training Accuracy is 0.8865, Loss is 0.2732
(Rank 3) Epoch 7: Training Accuracy is 0.8976, Loss is 0.2572
(Rank 3) Epoch 8: Training Accuracy is 0.9051, Loss is 0.2448
(Rank 3) Epoch 9: Training Accuracy is 0.9103, Loss is 0.2331


In [None]:
%%px --block
predictions = multi_model.predict(full_train_x)

In [None]:
%%px --block
pred = tf.math.argmax(predictions, axis=1)
train_confusion_mtx = tf.math.confusion_matrix(full_train_y, pred)
# normalize confusion matrix
train_confusion_mtx = train_confusion_mtx / tf.reduce_sum(train_confusion_mtx, 0).numpy()
train_confusion_mtx = tf.where(tf.math.is_nan(train_confusion_mtx), tf.zeros_like(train_confusion_mtx), train_confusion_mtx)

In [None]:
%%px --block
attack_labels = ['Non-Tor', 'NonVPN', 'Tor', 'VPN']
plt.figure(figsize=(8, 6))
sns.heatmap(train_confusion_mtx,
            xticklabels=attack_labels,
            yticklabels=attack_labels,
            annot=True, fmt='g')
plt.xlabel('Prediction')
plt.ylabel('True Label')
plt.title('Confusion Matrix for Worker %d on CIC-Darknet2020 Data' % (rank+1))
plt.show()