# Flower 프레임워크
- Adap에서 개발한 연합학습 프레임워크
- Pytorch, TensorFlow 등 다양한 딥러닝 프레임워크를 지원
- MNIST데이터셋으로 Pytorch, TensorFlow모델의 연합학습 기초 코드를 실습

# import

In [1]:
#pip install flwr, wrapt, opt_einsum, gast, astunparse, termcolor
#pip install -U flwr["simulation"]

In [1]:
import flwr as fl
import tensorflow as tf
import math
import os

# Tensorflow 로그 출력 줄이기
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
# 동시 9명 연합학습을 위해 cpu 사용
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

2023-08-10 20:55:00.682325: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-08-10 20:55:00.779537: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-08-10 20:55:00.804420: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-08-10 20:55:01.177971: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

# TensorFlow 모델

## 데이터 로드
- tensorflow에서 제공하는 mnist데이터 로드

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

## 모델 정의
- 간단한 MLP 모델을 정의

In [3]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

2023-08-10 20:52:25.977452: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-08-10 20:52:25.977527: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: dilab248
2023-08-10 20:52:25.977540: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: dilab248
2023-08-10 20:52:25.977709: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.199.2
2023-08-10 20:52:25.977758: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.199.2
2023-08-10 20:52:25.977768: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 470.199.2
2023-08-10 20:52:25.980016: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions 

## 중앙 집중식 모델 학습

In [4]:
history = model.fit(x_train, y_train, epochs=10)
history.history

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


{'loss': [0.3013162612915039,
  0.14687125384807587,
  0.1078963354229927,
  0.09024562686681747,
  0.07741712778806686,
  0.06770208477973938,
  0.06054411455988884,
  0.05311475321650505,
  0.0499182790517807,
  0.0433720126748085],
 'accuracy': [0.9113666415214539,
  0.9563500285148621,
  0.9674999713897705,
  0.972183346748352,
  0.9753999710083008,
  0.9780833125114441,
  0.9804666638374329,
  0.982783317565918,
  0.9832833409309387,
  0.985883355140686]}

In [5]:
output = model.evaluate(x_test,  y_test)
print(f'loss : {output[0]}, accuracy : {output[1]}')

loss : 0.0691157802939415, accuracy : 0.9807000160217285


## 연합학습을 위한 데이터 분할
- 각 클라이언트는 서로 다른 데이터를 가지고 있어야 하기에 데이터를 클라이어트의 수 만큼 분할

In [3]:
# 하나의 리스트를 n개로 분할하는 함수 정의
def list_split(arr, n):
    num = math.ceil(len(arr) / n)
    return [arr[i: i + num] for i in range(0, len(arr), num)]

In [4]:
# 각 클라이언트가 학습하기위한 데이터 분할
# 해당 예제에서는 3개의 클라이언트를 연합학습

x_train_list, y_train_list, x_val_list, y_val_list = map(list_split, (x_train, y_train, x_test, y_test), (3, 3, 3, 3))

In [5]:
for i in range(3):
    print(f'{i}번 클라이언트가 사용할 데이터 shape : {x_train_list[i].shape}')

0번 클라이언트가 사용할 데이터 shape : (20000, 28, 28)
1번 클라이언트가 사용할 데이터 shape : (20000, 28, 28)
2번 클라이언트가 사용할 데이터 shape : (20000, 28, 28)


## 연합학습 클라이언트 클래스(Flower client) 정의
- Flower의 NumPyClient를 상속받는 FlowerClient 클래스 정의
- 클라이언트의 동작을 정의함
    - 모델 파라미터 출력, 적용
    - 학습 함수 정의
    - 검증 함수 정의
    - 학습 및 검증 시 손실값과 정확도 반환

In [6]:
# 클라이언트 클래스에 필수로 있어야 하는 함수
class FlowerClient(fl.client.NumPyClient):
    def get_parameters(self, config):
        # -------------------------------------------------------------------
        # 모델의 파라미터를 반환하는 함수
        # 반환하는 모델의 파라미터는 아래와 같은 구조
        # [1번 레이어 파라미터(numpy.ndarray), 2번 레이어 파라미터(numpy.ndarray), ...]
        # -------------------------------------------------------------------
        return # 모델 파라미터(list)

    def fit(self, parameters, config):
        # -------------------------------------------------------------------
        # 모델의 학습을 정의하는 함수
        # 모델 학습 전에 서버에서 받은 parameters를 모델에 적용해야함
        # 모델 학습 후 모델의 파라미터와 학습데이터의 길이, 서버에서 집계할 모델 성능지표를 반환
        # -------------------------------------------------------------------
        return # 모델 파라미터(list), 학습 데이터 크기(int), 모델 성능지표(dict)

    def evaluate(self, parameters, config):
        # -------------------------------------------------------------------
        # 모델의 검증을 정의하는 함수
        # 모델 검증 전에 서버에서 받은 parameters를 모델에 적용해야함
        # 모델 검증 후 손실값과 검증 데이터의 길이, 서버에서 집계할 모델 성능지표를 반환
        # -------------------------------------------------------------------
        return #손실(float), 검증데이터 크기(int), 모델 성능지표(dict)

### TensorFlow모델의 FlowerClient 예시 코드

In [7]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, model, x_train, y_train, x_val, y_val):
        self.model = model
        self.x_train, self.y_train = x_train, y_train
        self.x_val, self.y_val = x_val, y_val

    def get_parameters(self, config):
        return self.model.get_weights() # 모델의 파라미터 반환

    def fit(self, parameters, config):
        self.model.set_weights(parameters) # 서버에서 받은 parameters 모델 적용
        self.model.fit(self.x_train, self.y_train, batch_size=64, epochs=1, verbose=2) # 모델 학습
        return self.model.get_weights(), len(self.x_train), {} # 필수 반환

    def evaluate(self, parameters, config):
        self.model.set_weights(parameters) # 서버에서 받은 parameters 모델 적용
        loss, acc = self.model.evaluate(self.x_val, self.y_val, batch_size=64, verbose=2)
        return loss, len(self.x_val), {"accuracy": acc} # 필수 반환

## 연합학습 코드
- 앞서 정의한 FlowerClient를 fl.client.start_numpy_client 함수를 통해 클라이언트 실행

In [8]:
model_fl = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model_fl.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

2023-08-10 20:55:30.263569: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-08-10 20:55:30.263670: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: dilab248
2023-08-10 20:55:30.263685: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: dilab248
2023-08-10 20:55:30.263867: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.199.2
2023-08-10 20:55:30.263917: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.199.2
2023-08-10 20:55:30.263929: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 470.199.2
2023-08-10 20:55:30.265622: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions 

### 같은 폴더에 있는 server 파일을 먼저 실행하고 아래 셀과 clinet1, clinet2 파일을 실행

In [9]:
client_num = 0
flwr_client = FlowerClient(model_fl, x_train_list[client_num], y_train_list[client_num], x_val_list[client_num], y_val_list[client_num])

fl.client.start_numpy_client(server_address="127.0.0.1:8080", client=flwr_client)

INFO flwr 2023-08-10 20:55:39,293 | grpc.py:50 | Opened insecure gRPC connection (no certificates were passed)
DEBUG flwr 2023-08-10 20:55:39,299 | connection.py:39 | ChannelConnectivity.IDLE
DEBUG flwr 2023-08-10 20:55:39,303 | connection.py:39 | ChannelConnectivity.READY


313/313 - 1s - loss: 0.5243 - accuracy: 0.8513 - 1s/epoch - 4ms/step
53/53 - 0s - loss: 0.3288 - accuracy: 0.9085 - 180ms/epoch - 3ms/step
313/313 - 1s - loss: 0.2646 - accuracy: 0.9234 - 1s/epoch - 3ms/step
53/53 - 0s - loss: 0.2457 - accuracy: 0.9271 - 124ms/epoch - 2ms/step
313/313 - 1s - loss: 0.2099 - accuracy: 0.9391 - 1s/epoch - 3ms/step
53/53 - 0s - loss: 0.2019 - accuracy: 0.9403 - 104ms/epoch - 2ms/step
313/313 - 1s - loss: 0.1762 - accuracy: 0.9474 - 1s/epoch - 3ms/step
53/53 - 0s - loss: 0.1778 - accuracy: 0.9466 - 107ms/epoch - 2ms/step
313/313 - 1s - loss: 0.1537 - accuracy: 0.9543 - 1s/epoch - 3ms/step
53/53 - 0s - loss: 0.1582 - accuracy: 0.9541 - 111ms/epoch - 2ms/step
313/313 - 1s - loss: 0.1347 - accuracy: 0.9608 - 1s/epoch - 3ms/step
53/53 - 0s - loss: 0.1465 - accuracy: 0.9577 - 118ms/epoch - 2ms/step
313/313 - 1s - loss: 0.1229 - accuracy: 0.9643 - 1s/epoch - 3ms/step
53/53 - 0s - loss: 0.1323 - accuracy: 0.9607 - 102ms/epoch - 2ms/step
313/313 - 1s - loss: 0.1101

DEBUG flwr 2023-08-10 20:56:02,240 | connection.py:113 | gRPC channel closed
INFO flwr 2023-08-10 20:56:02,241 | app.py:185 | Disconnect and shut down
