In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
main_dir = '/content/drive/MyDrive/Colab Notebooks/NTC'
checkpoint_py = '/content/drive/MyDrive/Colab Notebooks/FL/checkpoint_manager.py'
utils_py = '/content/drive/MyDrive/Colab Notebooks/FL/utils.py'

In [None]:
%cd $main_dir
!ls

/content/drive/MyDrive/Colab Notebooks/NTC
Accuracy.png		Flower_FL  __pycache__	     utils.py
Accuracy_Zoom.png	Loss.png   Result.ipynb
checkpoint_manager.py	models	   sdn_results
Federated_Learning_NTC	NTC.ipynb  sdn_saved_models


In [None]:
# !cp "$checkpoint_py" .
# !cp "$utils_py" .
# !cp "$models_py" .

In [None]:
from time import time, localtime
import numpy as np
from utils import plot_graph
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from keras import losses, metrics, optimizers
import random
import pandas as pd
import sys
import matplotlib.pyplot as plt
import nest_asyncio
from pathlib import Path
from checkpoint_manager import FileCheckpointManager
from keras.models import load_model
import pickle
nest_asyncio.apply()
SEED = 1337
tf.random.set_seed(SEED)

In [None]:
def train_test_split(df, frac=0.2):
    selected = df['flow_id'].drop_duplicates().sample(frac=frac)
    test = df[df['flow_id'].isin(selected)]
    train = df[~df['flow_id'].isin(selected)]
    return train, test

In [None]:
from models.CNN_2D import create_keras_model

Thay đổi các siêu tham số trong cell dưới <br>
**experiment_name:** Tên bộ dữ liệu được sử dụng, <Tên dữ liệu>_<Số byte sử dụng> <br>
**method:** Mô hình sử dụng <br>
**client_lr, server_lr:** Learning rate của client và server, khi fine tune bắt đầu từ 1 và giảm dần xuống 3e-4 <br>
**NUM_ROUNDS:** Số vòng lặp (Bắt đầu từ 1 và tăng dần lên 3000 nếu mô hình chưa hội tụ - 1/100/200/300/400/500/.../3000) <Br>
**BATCH_SIZE:** Kích thước batch (Bắt đầu từ 8 và tăng dần lên 64) 8/16/32/64 <br>
**split:** Để nguyên là 5
<br>
**byte_number:** (string): Cac byte cua packet, bao gom 10, 32, 64, 128, 256, 512, 1024, 1460

In [None]:
byte_number = "128"
experiment_name = "GQUIC_small_" + byte_number
method = "nonFL_VGG19"
client_lr = 3e-4
NUM_ROUNDS = 200
BATCH_SIZE = 32

In [None]:
this_dir = Path.cwd()
model_dir = this_dir / "sdn_saved_models" / experiment_name / method
output_dir = this_dir / "sdn_results" / experiment_name / method

if not model_dir.exists():
    model_dir.mkdir(parents=True)

if not output_dir.exists():
    output_dir.mkdir(parents=True)

In [None]:
def sec_to_hours(seconds):
    a = seconds//3600
    b = (seconds % 3600)//60
    c = (seconds % 3600) % 60
    d = "{:.0f} hours {:.0f} mins {:.0f} seconds".format(a, b, c)
    return d

In [None]:
def most_frequent(List):
    return max(set(List), key=List.count)

### **Đọc dữ liệu**

In [None]:
train_dir = '/content/drive/MyDrive/Colab Notebooks/FL/GQUIC_small/Train/GQUIC_train_' + byte_number + '.feather'
test_dir = '/content/drive/MyDrive/Colab Notebooks/FL/GQUIC_small/Test/GQUIC_test_' + byte_number + '.feather'
data = pd.read_feather(train_dir)
test = pd.read_feather(test_dir)

In [None]:
data.Label.value_counts() / 20

4    3742.0
0    2842.0
2    2808.0
1    2034.0
3    1740.0
Name: Label, dtype: float64

In [None]:
test.Label.value_counts() / 20

4    903.0
0    727.0
2    682.0
1    522.0
3    458.0
Name: Label, dtype: float64

In [None]:
data, val = train_test_split(data,frac=0.2)

In [None]:
result = test.groupby('flow_id')['Label'].apply(list).to_dict()
flow_label = []
for flow in result:
    flow_label.append(most_frequent(result[flow]))

In [None]:
result = data.groupby('flow_id')['Label'].apply(list).to_dict()
train_label = []
for flow in result:
    train_label.append(most_frequent(result[flow]))

In [None]:
result = val.groupby('flow_id')['Label'].apply(list).to_dict()
val_label = []
for flow in result:
    val_label.append(most_frequent(result[flow]))

In [None]:
flow_label = np.array(flow_label)
train_label = np.array(train_label)
val_label = np.array(val_label)

In [None]:
true_test = test.drop('flow_id', axis=1)
true_data = data.drop('flow_id', axis=1)
true_val = val.drop('flow_id' ,axis=1)

In [None]:
NUM_FEATURE = len(true_test.columns) - 1
NUM_CLASSES = len(np.unique(true_test['Label']))
print(NUM_FEATURE, NUM_CLASSES)

128 5


In [None]:
x_test = true_test.drop('Label', axis=1).to_numpy()/255
x_train = true_data.drop(['Label'], axis=1).to_numpy()/255
x_val = true_val.drop(['Label'],axis=1).to_numpy()/255

In [None]:
# One flow contains 20 packet
x_train = x_train.reshape(-1,20,NUM_FEATURE)
x_test = x_test.reshape(-1,20,NUM_FEATURE)
x_val = x_val.reshape(-1,20,NUM_FEATURE)

In [None]:
# x_train = x_train.reshape(-1, 20*NUM_FEATURE)
# x_test = x_test.reshape(-1, 20*NUM_FEATURE)
# x_val = x_val.reshape(-1, 20*NUM_FEATURE)

In [None]:
print(x_train.shape)
print(x_test.shape)
print(x_val.shape)

(10533, 20, 128)
(3292, 20, 128)
(2633, 20, 128)


In [None]:
import cv2

def _data_processing(x):
  result = []

  for i in range(x.shape[0]):
    tmp = cv2.resize(x[i], (32, 32), interpolation=cv2.INTER_LINEAR)
    result.append(tmp)

  result = np.array(result)
  return result

In [None]:
x_train = _data_processing(x_train)
x_val = _data_processing(x_val)
x_test = _data_processing(x_test)

In [None]:
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)
x_val = np.expand_dims(x_val, -1)

In [None]:
print(x_train.shape)
print(x_test.shape)
print(x_val.shape)

(10533, 32, 32, 1)
(3292, 32, 32, 1)
(2633, 32, 32, 1)


### **Model Definition**

In [None]:
model = create_keras_model(x_train, NUM_CLASSES)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 20, 128, 32)       832       
                                                                 
 conv2d_1 (Conv2D)           (None, 20, 128, 32)       25632     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 10, 64, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 10, 64, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 10, 64, 64)        18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 10, 64, 64)        36928     
                                                        

In [None]:
from keras.layers import Input, Concatenate, Dense, Flatten, Dense
from keras.models import Sequential
# from keras.applications.vgg16 import VGG16
# from keras.applications import EfficientNetB0
# from keras.applications import InceptionV3
from keras.applications import VGG19
# from keras.applications import ResNet50

img_input = Input(shape=(32, 32, 1))
img_conc = Concatenate()([img_input, img_input, img_input])

model = Sequential()
conv_base = VGG19(weights=None, include_top=False, input_tensor=img_conc)
model.add(conv_base)
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(NUM_CLASSES, activation='softmax'))

model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg19 (Functional)          (None, 1, 1, 512)         20024384  
                                                                 
 flatten_7 (Flatten)         (None, 512)               0         
                                                                 
 dense_13 (Dense)            (None, 256)               131328    
                                                                 
 dense_14 (Dense)            (None, 5)                 1285      
                                                                 
Total params: 20,156,997
Trainable params: 20,156,997
Non-trainable params: 0
_________________________________________________________________


In [None]:
model_file = str(model_dir) + '/model.h5'

### **Training**

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=client_lr),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
)

In [None]:
start = time()
history = model.fit(x_train, train_label, epochs=NUM_ROUNDS, batch_size=BATCH_SIZE, use_multiprocessing=True, shuffle=True, validation_data=(x_val,val_label))
end = time()
duration = end - start

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
model.save(model_file)

In [None]:
# Lưu lại history vào file
with open(history_file, 'wb') as f:
    pickle.dump(history.history, f)

In [None]:
total_time = "Time: {}".format(sec_to_hours(duration))
print(total_time)

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

### **Evaluation**

In [None]:
best_model_accuracy = history.history['sparse_categorical_accuracy'][np.argmin(history.history['loss'])]

In [None]:
test_acc = model.evaluate(x_test, flow_label, verbose=2, batch_size=BATCH_SIZE, use_multiprocessing=True)

In [None]:
train_val = str(best_model_accuracy*100) + "_" + str(test_acc[1]*100)

In [None]:
print(train_val)

In [None]:
with open(output_dir/'parameters.txt', 'w') as f:
    print('client_lr: {}\nEpochs: {}\nBATCH_SIZE: {}'.format(
        client_lr, NUM_ROUNDS, BATCH_SIZE), file=f)
    f.close()

In [None]:
with open(output_dir/'time.txt', 'w') as f:
    print(total_time, file=f)
    f.close()

In [None]:
predictions = model.predict(x_test, verbose=2, use_multiprocessing=True, batch_size=BATCH_SIZE)

In [None]:
flow_pred = np.argmax(predictions, axis=-1)

In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report

classes = []
for c in range(NUM_CLASSES):
    classes.append("Class {}".format(c))
print(classes)
with open(output_dir/'metrics.txt', 'w') as f:
    confusion = confusion_matrix(flow_label, flow_pred)
    print('Confusion Matrix\n', file=f)
    print(confusion, file=f)

    # importing accuracy_score, precision_score, recall_score, f1_score
    print('\nAccuracy: {:.2f}\n'.format(
        accuracy_score(flow_label, flow_pred)), file=f)

    print('Micro Precision: {:.2f}\n'.format(
        precision_score(flow_label, flow_pred, average='micro')), file=f)
    print('Micro Recall: {:.2f}\n'.format(
        recall_score(flow_label, flow_pred, average='micro')), file=f)
    print(
        'Micro F1-score: {:.2f}\n'.format(f1_score(flow_label, flow_pred, average='micro')), file=f)

    print('Macro Precision: {:.2f}\n'.format(
        precision_score(flow_label, flow_pred, average='macro')), file=f)
    print('Macro Recall: {:.2f}\n'.format(
        recall_score(flow_label, flow_pred, average='macro')), file=f)
    print(
        'Macro F1-score: {:.2f}\n'.format(f1_score(flow_label, flow_pred, average='macro')), file=f)

    print('Weighted Precision: {:.2f}\n'.format(
        precision_score(flow_label, flow_pred, average='weighted')), file=f)
    print('Weighted Recall: {:.2f}\n'.format(
        recall_score(flow_label, flow_pred, average='weighted')), file=f)
    print(
        'Weighted F1-score: {:.2f}\n'.format(f1_score(flow_label, flow_pred, average='weighted')), file=f)

    print('\nClassification Report\n', file=f)
    print(classification_report(flow_label, flow_pred, target_names=classes), file=f)
    f.close()

In [None]:
epochs = range(1, len(history.history['sparse_categorical_accuracy']) + 1)

fig = plt.figure(figsize=(10, 6))
plt.plot(epochs, history.history['sparse_categorical_accuracy'], label='accuracy')
plt.plot(epochs, history.history['val_sparse_categorical_accuracy'], label='val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0,1])
plt.title('Training and validation accuracy')
plt.legend()
plt.savefig(output_dir / "normal_model_Accuracy.png")

In [None]:
fig = plt.figure(figsize=(10, 6))
plt.plot(epochs, history.history['loss'], label='loss')
plt.plot(epochs, history.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and validation loss')
plt.legend()
plt.savefig(output_dir / "normal_model_Loss.png")