In [1]:
import os
import math
import wandb
from sklearn.model_selection import train_test_split
import random
import numpy as np
from tensorflow.keras import datasets, layers, models
import tensorflow as tf
import pandas as pd
from collections import OrderedDict
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

def gen_ue_cluster(UE_sort):# UE를 유클리드 distance 기준으로 클러스터링한 리스트를 반환하는 함수
    UE_cluster = []
    UE_group = []
    UE_distance = [] # 2 UE 간의 거리 리스트
    UE_len = len(UE_sort)
    avg_distance = 0
    UE_sort_keys = list(UE_sort.keys())
    for i in range(0, UE_len - 1):
        x1, y1 = i + 1, UE_sort[UE_sort_keys[i]]
        x2, y2 = i+2, UE_sort[UE_sort_keys[i+1]]
        distance = (((x2 - x1) ** 2) + ((y2 - y1) ** 2)) ** 0.5
        avg_distance += distance
        UE_distance.append((UE_sort_keys[i], UE_sort_keys[i+1],distance))
    avg_distance = avg_distance/len(UE_sort)
    
    for i, x in enumerate(UE_distance):
        if UE_distance[-1][0] == x[0]:
            UE_group.append(x[0])
            if x[2] > avg_distance:
                UE_cluster.append(UE_group.copy())
                UE_group.clear()
                UE_group.append(x[1])
                UE_cluster.append(UE_group.copy())
            else:
                UE_group.append(x[1])
                UE_cluster.append(UE_group.copy())
        else:
            if x[2] > avg_distance:
                UE_group.append(x[0])
                UE_cluster.append(UE_group.copy())
                UE_group.clear()
            else:
                UE_group.append(x[0])
            
    
    return UE_cluster
    
def gap(weights): # Global average pooling
    if len(weights.shape) == 4:
        row = np.mean(weights, axis=1)
        result = np.mean(row, axis=0)
    elif len(weights.shape) == 2:
        result = np.mean(weights, axis=0)
    else:
        raise print('가중치 잘못됨')

    return result


def split_ue_group(UE_weights, UE_NUM): # 각 레이어별 가중치 평균보다 높은지 낮은지 나누는 함수
    layers = UE_weights[0].keys()
    concat_weight = {}
    concat_mean_weight = {}

    for layer in layers:
        total = np.zeros(
            (UE_NUM, UE_weights[0][layer][0].shape[-2:][0], UE_weights[0][layer][0].shape[-2:][1]))
        for i, UE in enumerate(UE_weights):
            total[i] = gap(UE[layer][0])
        concat_weight[layer] = total

    for layer in concat_weight.keys():
        concat_mean_weight[layer] = np.mean(concat_weight[layer], axis=0)

    UE_high_low = {}
    for layer in concat_weight.keys():
        true_cnt_list = []
        for x in range(UE_NUM):
            high_low = concat_weight[layer][x] > concat_mean_weight[layer]
            if len(high_low[high_low == True]) >= len(high_low[high_low == False]):
                true_cnt_list.append(True)
            else:
                true_cnt_list.append(False)
        UE_high_low[layer] = true_cnt_list
        
    result = [0 for _ in range(UE_NUM)]
    for layer in UE_high_low.keys():
        for i, x in enumerate(UE_high_low[layer]):
            if x == True:
                result[i] += 1

    d = dict()
    
    for i, x in enumerate(result):
        d[i] = x

    UE_sort = OrderedDict(sorted(d.items(), key=lambda t:t[1]))
    
    return gen_ue_cluster(UE_sort)


def gen_UE_model():
    model = models.Sequential()
    model.add(layers.Conv2D(filters=6, kernel_size=(5, 5),
                            strides=1, activation='tanh', input_shape=(32, 32, 1)))
    model.add(layers.AveragePooling2D(pool_size=2, strides=2))
    model.add(layers.Conv2D(filters=16, kernel_size=(
        5, 5), strides=1, activation='tanh'))
    model.add(layers.AveragePooling2D(pool_size=2, strides=2))
    model.add(layers.Flatten())
    model.add(layers.Dense(120, activation='tanh'))
    model.add(layers.Dense(84, activation='tanh'))
    model.add(layers.Dense(10, activation='softmax'))
    model.compile(optimizer='SGD',
                  loss='sparse_categorical_crossentropy', metrics=['accuracy'])  # lr = 0.01

    return model


def gen_server_model():
    server_model = models.Sequential()
    server_model.add(layers.Conv2D(filters=6, kernel_size=(
        5, 5), strides=1, activation='tanh', input_shape=(32, 32, 1)))
    server_model.add(layers.AveragePooling2D(pool_size=2, strides=2))
    server_model.add(layers.Conv2D(
        filters=16, kernel_size=(5, 5), strides=1, activation='tanh'))
    server_model.add(layers.AveragePooling2D(pool_size=2, strides=2))
    server_model.add(layers.Flatten())
    server_model.add(layers.Dense(120, activation='tanh'))
    server_model.add(layers.Dense(84, activation='tanh'))
    server_model.add(layers.Dense(10, activation='softmax'))

    return server_model

In [2]:
# import os
# import math
# import wandb
# from sklearn.model_selection import train_test_split
# import random
# import numpy as np
# from tensorflow.keras import datasets, layers, models
# import tensorflow as tf
# import pandas as pd
# from collections import OrderedDict
# tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# def gen_ue_cluster(UE_sort):# UE를 유클리드 distance 기준으로 클러스터링한 리스트를 반환하는 함수
#     UE_cluster = []
#     UE_group = []
#     UE_distance = [] # 2 UE 간의 거리 리스트
#     UE_len = len(UE_sort)
#     avg_distance = 0
#     UE_sort_keys = list(UE_sort.keys())
#     for i in range(0, UE_len - 1):
#         x1, y1 = i + 1, UE_sort[UE_sort_keys[i]]
#         x2, y2 = i+2, UE_sort[UE_sort_keys[i+1]]
#         distance = (((x2 - x1) ** 2) + ((y2 - y1) ** 2)) ** 0.5
#         avg_distance += distance
#         UE_distance.append((UE_sort_keys[i], UE_sort_keys[i+1],distance))
#     avg_distance = avg_distance/len(UE_sort)
    
#     for i, x in enumerate(UE_distance):
#         if UE_distance[-1][0] == x[0]:
#             UE_group.append(x[0])
#             if x[2] > avg_distance:
#                 UE_cluster.append(UE_group.copy())
#                 UE_group.clear()
#                 UE_group.append(x[1])
#                 UE_cluster.append(UE_group.copy())
#             else:
#                 UE_group.append(x[1])
#                 UE_cluster.append(UE_group.copy())
#         else:
#             if x[2] > avg_distance:
#                 UE_group.append(x[0])
#                 UE_cluster.append(UE_group.copy())
#                 UE_group.clear()
#             else:
#                 UE_group.append(x[0])
            
    
#     return UE_cluster
    
# def gap(weights): # Global average pooling
#     if len(weights.shape) == 4:
#         row = np.mean(weights, axis=1)
#         result = np.mean(row, axis=0)
#     elif len(weights.shape) == 2:
#         result = np.mean(weights, axis=0)
#     else:
#         raise print('가중치 잘못됨')

#     return result


# def split_ue_group(UE_weights, UE_NUM): # 각 레이어별 가중치 평균보다 높은지 낮은지 나누는 함수
#     layers = UE_weights[0].keys()
#     concat_weight = {}
#     concat_mean_weight = {}

#     for layer in layers:
#         total = np.zeros(
#             (UE_NUM, UE_weights[0][layer][0].shape[-2:][0], UE_weights[0][layer][0].shape[-2:][1]))
#         for i, UE in enumerate(UE_weights):
#             total[i] = gap(UE[layer][0])
#         concat_weight[layer] = total

#     for layer in concat_weight.keys():
#         concat_mean_weight[layer] = np.mean(concat_weight[layer], axis=0)

#     UE_high_low = {}
#     for layer in concat_weight.keys():
#         true_cnt_list = []
#         for x in range(UE_NUM):
#             high_low = concat_weight[layer][x] > concat_mean_weight[layer]
#             if len(high_low[high_low == True]) >= len(high_low[high_low == False]):
#                 true_cnt_list.append(True)
#             else:
#                 true_cnt_list.append(False)
#         UE_high_low[layer] = true_cnt_list
        
#     result = [0 for _ in range(UE_NUM)]
#     for layer in UE_high_low.keys():
#         for i, x in enumerate(UE_high_low[layer]):
#             if x == True:
#                 result[i] += 1

#     d = dict()
    
#     for i, x in enumerate(result):
#         d[i] = x

#     UE_sort = OrderedDict(sorted(d.items(), key=lambda t:t[1]))
    
#     return gen_ue_cluster(UE_sort)


# def gen_UE_model():
#     model = models.Sequential()
#     model.add(layers.Conv2D(filters=6, kernel_size=(5, 5),
#                             strides=1, activation='sigmoid', input_shape=(32, 32, 1)))
#     model.add(layers.AveragePooling2D(pool_size=2, strides=2))
#     model.add(layers.Activation('tanh'))
#     model.add(layers.Conv2D(filters=16, kernel_size=(5, 5), strides=1, activation='tanh'))
#     model.add(layers.AveragePooling2D(pool_size=2, strides=2))
#     model.add(layers.Activation('sigmoid'))
#     model.add(layers.Conv2D(filters=120, kernel_size=(5, 5), strides=1, activation='tanh'))
#     model.add(layers.Flatten())
#     model.add(layers.Dense(84, activation='tanh'))
#     model.add(layers.Dense(10, activation='softmax'))
#     model.compile(optimizer='SGD',
#                   loss='sparse_categorical_crossentropy', metrics=['accuracy'])  # lr = 0.01

#     return model


# def gen_server_model():
#     server_model = models.Sequential()
#     server_model.add(layers.Conv2D(filters=6, kernel_size=(
#         5, 5), strides=1, activation='sigmoid', input_shape=(32, 32, 1)))
#     server_model.add(layers.AveragePooling2D(pool_size=2, strides=2))
#     server_model.add(layers.Activation('tanh'))
#     server_model.add(layers.Conv2D(filters=16, kernel_size=(5, 5), strides=1, activation='tanh'))
#     server_model.add(layers.AveragePooling2D(pool_size=2, strides=2))
#     server_model.add(layers.Activation('sigmoid'))
#     server_model.add(layers.Conv2D(filters=120, kernel_size=(5, 5), strides=1, activation='tanh'))
#     server_model.add(layers.Flatten())
#     server_model.add(layers.Dense(84, activation='tanh'))
#     server_model.add(layers.Dense(10, activation='softmax'))

#     return server_model

In [3]:
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()

x_train = x_train.reshape(60000, 28, 28, 1) / 255
x_test = x_test.reshape(10000, 28, 28, 1) / 255
x_train = np.pad(x_train, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
x_test = np.pad(x_test, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
idx = np.argsort(y_test)
x_train_sorted = x_test[idx]
y_train_sorted = y_test[idx]

UE_NUM = 20

UE = []
for _ in range(UE_NUM):
    UE.append({"x_train": [], "y_train": []})

random.seed(45)
seed = random.randint(1,1000000)
print(seed)
print(f'seed : 235511')
tf.random.set_seed(235511) # 100 좋은거, 12: 5 클러스터, 403092, 235511, 130670 : 6 클러스터, 192150 : 20

total = 0

x_eval_dataset = x_test.copy()
y_eval_dataset = y_test.copy()

for i in range(UE_NUM):
#     random_start = random.randint(1,30000)
#     end = random.randint(30001, 50000)
#     print(random_start, end)
#     UE[i]['x_train'] = x_train[random_start:end+10000]
#     UE[i]['y_train'] = y_train[random_start:end+10000]
    random_start = random.randint(1,50000)
#     data_num = random.randint(5000, 30000)
    data_num = random.randint(10000, 60000-random_start)
    print(random_start, random_start+data_num)
    UE[i]['x_train'] = x_train[random_start:random_start+data_num]
    UE[i]['y_train'] = y_train[random_start:random_start+data_num]

x_train, x_test, y_train, y_test = [], [], [], []
for i in range(UE_NUM):
    x_train_temp, x_test_temp, y_train_temp, y_test_temp = train_test_split(
        UE[i]['x_train'], UE[i]['y_train'], test_size=0.2, random_state=45)
    x_train.append(x_train_temp)
    x_test.append(x_test_temp)
    y_train.append(y_train_temp)
    y_test.append(y_test_temp)

285083
seed : 235511
27374 53365
16887 32253
19870 40969
1416 16156
31737 42098
7527 36129
7715 38099
48725 58798
40956 52047
8849 36647
48298 58613
13829 51903
4913 55081
26748 41427
45905 56373
2109 23395
21016 39269
23367 58765
47527 58837
6145 43106


In [4]:
for i, x in enumerate(UE):
    print(f"Client {i+1}'s data amount : {len(x['x_train'])}")

Client 1's data amount : 25991
Client 2's data amount : 15366
Client 3's data amount : 21099
Client 4's data amount : 14740
Client 5's data amount : 10361
Client 6's data amount : 28602
Client 7's data amount : 30384
Client 8's data amount : 10073
Client 9's data amount : 11091
Client 10's data amount : 27798
Client 11's data amount : 10315
Client 12's data amount : 38074
Client 13's data amount : 50168
Client 14's data amount : 14679
Client 15's data amount : 10468
Client 16's data amount : 21286
Client 17's data amount : 18253
Client 18's data amount : 35398
Client 19's data amount : 11310
Client 20's data amount : 36961


In [5]:
wandb.init(project='Federated Learning (KCI)', name=f'0930 Simulation base(last)', entity='yhkim')

for round in range(100):  # Communication Round, Global epoch
    # 각 UE 학습
    learning_result_list = []
    for i in range(UE_NUM):
        if os.path.isdir('fl_model_0930_last'):
            model = tf.keras.models.load_model('fl_model_0930_last')
            model.compile(optimizer='SGD',
                          loss='sparse_categorical_crossentropy', metrics=['accuracy'])
            learning_result_list.append(model.fit(
                x_train[i], y_train[i], batch_size=100, epochs=1, validation_data=(x_test[i], y_test[i])))
            tf.keras.backend.clear_session()
        else:
            model = gen_UE_model()
            learning_result_list.append(model.fit(
                x_train[i], y_train[i], batch_size=100, epochs=1, validation_data=(x_test[i], y_test[i])))
            tf.keras.backend.clear_session()

    # 각 UE 학습 리스트 안에 딕셔너리로 저장
    UE_weights = []

    for model in learning_result_list:
        layer_weights = {}
        for x in model.model.layers:
            if len(x.get_weights()) > 0:
                layer_weights[x.name] = x.get_weights()
        UE_weights.append(layer_weights)

    # 서버 모델 생성
    server_model = gen_server_model()  # FL 서버 모델 생성

    # UE가 각자 학습한 가중치 취합
    sum_weights = {}

    for i in range(len(list(UE_weights[0].keys()))):
        weight_shape = [0]
        bias_shape = [0]
        for dim in UE_weights[0][list(UE_weights[0].keys())[i]][0].shape:
            weight_shape.append(dim)
        for dim in UE_weights[0][list(UE_weights[0].keys())[i]][1].shape:
            bias_shape.append(dim)
        sum_weights.update({list(UE_weights[0].keys())[i]: {
            'weight': np.empty(weight_shape), 'bias': np.empty(bias_shape)}})

        for UE in UE_weights:
            sum_weights[list(UE.keys())[i]]['weight'] = np.append(
                sum_weights[list(UE.keys())[i]]['weight'], [UE[list(UE.keys())[i]][0]], axis=0)
            sum_weights[list(UE.keys())[i]]['bias'] = np.append(
                sum_weights[list(UE.keys())[i]]['bias'], [UE[list(UE.keys())[i]][1]], axis=0)

    # 서버 모델에 가중치 설정하는 코드
    for layer in sum_weights.keys():
        for model_layer in server_model.layers:
            if layer == model_layer.name:
                model_layer.set_weights([np.mean(sum_weights[layer]['weight'], axis=0), np.mean(  # 각 UE 가중치 평균값으로 FL
                    sum_weights[layer]['bias'], axis=0)])


    # FL 성능 검증
    server_model.compile(
        optimizer='SGD', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    result = server_model.evaluate(
        x=x_eval_dataset, y=y_eval_dataset, batch_size=100)
    tf.keras.backend.clear_session()
    print('test loss, test acc:', result)
    server_model.save('fl_model_0930_last')
    
    wandb.log(
        {'global accuracy': result[1], 'global loss': result[0], 'global epoch': round+1})
wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: yhkim (use `wandb login --relogin` to force relogin)
  warn("The `IPython.html` package has been deprecated since IPython 4.0. "


test loss, test acc: [2.3022775650024414, 0.11349999904632568]
test loss, test acc: [2.30184268951416, 0.11349999904632568]


test loss, test acc: [2.301532506942749, 0.11349999904632568]
test loss, test acc: [2.301307201385498, 0.11349999904632568]
test loss, test acc: [2.3011393547058105, 0.11349999904632568]


test loss, test acc: [2.301008462905884, 0.11349999904632568]
test loss, test acc: [2.3008999824523926, 0.11349999904632568]
test loss, test acc: [2.3008062839508057, 0.11349999904632568]


test loss, test acc: [2.3007185459136963, 0.11349999904632568]
test loss, test acc: [2.300633192062378, 0.11349999904632568]
test loss, test acc: [2.3005435466766357, 0.11349999904632568]


test loss, test acc: [2.3004465103149414, 0.11349999904632568]
test loss, test acc: [2.3003363609313965, 0.11349999904632568]
test loss, test acc: [2.3002092838287354, 0.11349999904632568]


test loss, test acc: [2.3000569343566895, 0.11349999904632568]
test loss, test acc: [2.2998733520507812, 0.11349999904632568]
test loss, test acc: [2.299643039703369, 0.11349999904632568]


test loss, test acc: [2.2993526458740234, 0.11349999904632568]
test loss, test acc: [2.2989747524261475, 0.11349999904632568]
test loss, test acc: [2.298469066619873, 0.11349999904632568]


test loss, test acc: [2.297774314880371, 0.11349999904632568]
test loss, test acc: [2.2967798709869385, 0.11349999904632568]
test loss, test acc: [2.295293092727661, 0.11349999904632568]


test loss, test acc: [2.2929320335388184, 0.11349999904632568]
test loss, test acc: [2.288877010345459, 0.11349999904632568]
test loss, test acc: [2.2811219692230225, 0.14949999749660492]


test loss, test acc: [2.263779640197754, 0.23729999363422394]
test loss, test acc: [2.213740348815918, 0.37720000743865967]
test loss, test acc: [2.0282182693481445, 0.46160000562667847]


test loss, test acc: [1.634920597076416, 0.5281000137329102]
test loss, test acc: [1.2921876907348633, 0.6028000116348267]
test loss, test acc: [1.0695911645889282, 0.6685000061988831]


test loss, test acc: [0.9325659871101379, 0.7197999954223633]
test loss, test acc: [0.834548830986023, 0.7554000020027161]
test loss, test acc: [0.7566872835159302, 0.7807999849319458]


test loss, test acc: [0.6915642023086548, 0.8001999855041504]
test loss, test acc: [0.6351833343505859, 0.8167999982833862]
test loss, test acc: [0.5856775045394897, 0.8323000073432922]


test loss, test acc: [0.5425573587417603, 0.8450999855995178]
test loss, test acc: [0.5055515170097351, 0.8551999926567078]
test loss, test acc: [0.4740771949291229, 0.8636000156402588]


test loss, test acc: [0.4473199248313904, 0.8718000054359436]
test loss, test acc: [0.4244084358215332, 0.8791000247001648]
test loss, test acc: [0.40453335642814636, 0.8834999799728394]


test loss, test acc: [0.38701197504997253, 0.8894000053405762]
test loss, test acc: [0.37130972743034363, 0.8952999711036682]
test loss, test acc: [0.35702887177467346, 0.8971999883651733]


test loss, test acc: [0.34388044476509094, 0.9027000069618225]
test loss, test acc: [0.3316550850868225, 0.9057999849319458]
test loss, test acc: [0.3201991319656372, 0.9081000089645386]


test loss, test acc: [0.3093964159488678, 0.9117000102996826]
test loss, test acc: [0.2991584241390228, 0.9143000245094299]
test loss, test acc: [0.2894160747528076, 0.9162999987602234]


test loss, test acc: [0.2801160514354706, 0.9186999797821045]
test loss, test acc: [0.2712171673774719, 0.9207000136375427]
test loss, test acc: [0.26268839836120605, 0.9235000014305115]


test loss, test acc: [0.25450658798217773, 0.9259999990463257]
test loss, test acc: [0.2466544657945633, 0.9282000064849854]
test loss, test acc: [0.23911944031715393, 0.930899977684021]


test loss, test acc: [0.2318917065858841, 0.9330000281333923]
test loss, test acc: [0.22496318817138672, 0.9351000189781189]
test loss, test acc: [0.21832691133022308, 0.9369000196456909]


test loss, test acc: [0.21197640895843506, 0.9386000037193298]
test loss, test acc: [0.20590512454509735, 0.9402999877929688]


test loss, test acc: [0.20010608434677124, 0.9412999749183655]
test loss, test acc: [0.19457218050956726, 0.9434999823570251]
test loss, test acc: [0.1892952024936676, 0.9453999996185303]


test loss, test acc: [0.18426638841629028, 0.946399986743927]
test loss, test acc: [0.17947618663311005, 0.9474999904632568]
test loss, test acc: [0.1749144047498703, 0.9490000009536743]


test loss, test acc: [0.1705702543258667, 0.9498999714851379]
test loss, test acc: [0.16643288731575012, 0.9513000249862671]
test loss, test acc: [0.16249121725559235, 0.9526000022888184]


test loss, test acc: [0.15873438119888306, 0.9537000060081482]
test loss, test acc: [0.15515205264091492, 0.954800009727478]
test loss, test acc: [0.15173426270484924, 0.9559000134468079]


test loss, test acc: [0.14847148954868317, 0.9567999839782715]
test loss, test acc: [0.14535479247570038, 0.9578999876976013]
test loss, test acc: [0.14237573742866516, 0.9592999815940857]


test loss, test acc: [0.1395263373851776, 0.9596999883651733]
test loss, test acc: [0.1367991715669632, 0.9603000283241272]
test loss, test acc: [0.13418716192245483, 0.9617000222206116]


test loss, test acc: [0.13168372213840485, 0.9620000123977661]
test loss, test acc: [0.1292826235294342, 0.9628999829292297]
test loss, test acc: [0.12697818875312805, 0.9642999768257141]


test loss, test acc: [0.12476494163274765, 0.9649999737739563]
test loss, test acc: [0.12263783067464828, 0.9659000039100647]
test loss, test acc: [0.12059221416711807, 0.9666000008583069]


test loss, test acc: [0.11862356215715408, 0.967199981212616]
test loss, test acc: [0.1167277842760086, 0.9674999713897705]
test loss, test acc: [0.11490112543106079, 0.9678999781608582]


test loss, test acc: [0.11313983052968979, 0.9685999751091003]
test loss, test acc: [0.11144069582223892, 0.9689000248908997]
test loss, test acc: [0.10980042815208435, 0.9695000052452087]


test loss, test acc: [0.10821609199047089, 0.9699000120162964]
test loss, test acc: [0.10668496042490005, 0.9699000120162964]
test loss, test acc: [0.10520438104867935, 0.9704999923706055]


test loss, test acc: [0.1037718653678894, 0.9707000255584717]
test loss, test acc: [0.10238524526357651, 0.9710999727249146]
test loss, test acc: [0.1010422334074974, 0.9714000225067139]


0,1
global accuracy,0.9714
global epoch,100.0
global loss,0.10104


0,1
global accuracy,▁▁▁▁▁▁▁▁▁▁▁▃▅▆▇▇▇▇▇▇████████████████████
global epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
global loss,████████████▅▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
