In [1]:
from google.colab import drive
drive.mount('/content/drive')
!pip install portpicker==1.5.2
!pip install numpy==1.25
!pip install --quiet --upgrade tensorflow-federated
!pip install fastapi==0.103.2
!pip install kaleido
!pip install python-multipart
!pip install uvicorn

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.7/73.7 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m50.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.7/33.7 MB[0m [31m43.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.2/241.2 kB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.2/323.2 kB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m489.9/489.9 MB[0m [31

In [2]:
!unzip '/content/drive/MyDrive/new_datasets.zip'

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_184.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_185.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_19.png  
 extracting: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_2.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_20.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_21.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_22.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_23.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_24.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_25.png  
  inflating: new_datasets/adclicer/VirusShare_12cbc2ae859b5c2581cfcaaaab49ce20_26.pn

In [3]:
import tensorflow as tf
from tensorflow.keras import layers
import tensorflow_federated as tff
import numpy as np
import os
from tensorflow.keras import regularizers
import random
from PIL import Image
import sys
sys.path.append('/content/new_datasets')

模型宣告

In [4]:
def build_model(num_classes):
    model = tf.keras.Sequential()

    # C1 卷積層
    model.add(layers.Conv2D(32, kernel_size=(3, 3), strides=(1, 1), activation='relu', input_shape=(28, 28, 1)))
    model.add(layers.BatchNormalization())

    # P1 池化層
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))

    # C2 卷積層
    model.add(layers.Conv2D(64, kernel_size=(3, 3), strides=(1, 1), activation='relu'))
    model.add(layers.BatchNormalization())

    # P2 池化層
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))

    # C3 卷積層
    model.add(layers.Conv2D(128, kernel_size=(3, 3), strides=(1, 1), activation='relu'))
    model.add(layers.BatchNormalization())

    # P3 池化層
    model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'))

    # 將卷積網路的輸出展平，以便輸入全連接層
    model.add(layers.Flatten())

    # FC1 帶L2正則化的全連接層
    model.add(layers.Dense(units=1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
    model.add(layers.BatchNormalization())

    # 加入Dropout以防止過擬合
    model.add(layers.Dropout(0.5))

    # FC2 帶L2正則化的輸出層
    model.add(layers.Dense(units=num_classes, activation='softmax', kernel_regularizer=regularizers.l2(0.001)))

    return model

In [5]:
def load_data(folder, class_labels):
    # 加載指定文件夾的數據
    data = []
    labels = []

    for folder_name in os.listdir(folder):
        folder_path = os.path.join(folder, folder_name)
        if os.path.isdir(folder_path):
            for filename in os.listdir(folder_path):
                if filename.endswith(".png"):
                    file_path = os.path.join(folder_path, filename)
                    image = read_images_from_png(file_path)
                    data.append(image)
                    labels.append(class_labels[folder_name])

    # 將數據和標籤轉換為 NumPy 數組
    data = np.array(data, dtype=np.uint8).reshape(-1, 28, 28, 1) / 255.0
    labels = np.array(labels)

    # 打亂數據集
    dataset = tf.data.Dataset.from_tensor_slices((data, labels))
    dataset = dataset.shuffle(buffer_size=len(data)).batch(32)

    # 預取數據以加快訓練速度
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset

def get_class_labels(folders):
    # 初始化一個空的字典來存儲類別標籤
    class_labels = {}
    for folder in folders:
        # 從資料夾路徑中提取類別名稱
        classname = os.path.basename(folder)
        # 將整數標籤分配給類別名稱
        class_labels[classname] = len(class_labels)
    return class_labels

def read_images_from_png(file_path):
    # 打開圖片文件
    image = Image.open(file_path)
    # 轉換為灰階
    image = image.convert('L')
    # 調整大小到 28x28
    image = image.resize((28, 28))
    # 將圖片數據轉換為數組
    image_data = np.array(image)
    return image_data

def create_federated_model():
    return tff.learning.models.from_keras_model(
        keras_model=build_model(num_classes),
        input_spec=federated_data.create_tf_dataset_for_client(federated_data.client_ids[0]).element_spec,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])

In [6]:
class MyClientData(tff.simulation.datasets.TestClientData):

    def __init__(self, client_data):
        self.client_data = client_data

    def create_tf_dataset_for_client(self, client_id):
        return self.client_data[client_id]

    @property
    def client_ids(self):
        return list(self.client_data.keys())

In [7]:
# 設定基礎資料夾路徑
base_folder = "/content/new_datasets"
data_folders = []

# 獲取基礎資料夾中的所有子資料夾
subfolders = [os.path.join(base_folder, d) for d in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, d))]
data_folders.extend(subfolders)

# 獲取類別標籤並計算類別數量
class_labels = get_class_labels(data_folders)
num_classes = len(class_labels)
print(f"類別數量: {num_classes}")

# 為每個客戶端加載相同的數據
num_clients = 3  # 你希望創建的客戶端數量
client_datasets = {}
base_folders = ["/content/new_datasets"] * num_clients

for i in range(num_clients):
    data_folders = [os.path.join(base_folders[i], d) for d in os.listdir(base_folders[i]) if os.path.isdir(os.path.join(base_folders[i], d))]
    class_labels = get_class_labels(data_folders)
    client_datasets[f'client_{i}'] = load_data(base_folders[i], class_labels)

# 印出客戶端數據集
print(client_datasets)

# 創建聯邦數據
federated_data = MyClientData(client_datasets)

類別數量: 5
{'client_0': <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>, 'client_1': <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>, 'client_2': <_PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>}


In [8]:
num_clients_per_round=3

iterative_process = tff.learning.algorithms.build_unweighted_fed_avg(
    model_fn=create_federated_model,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))

Instructions for updating:
Colocations handled automatically by placer.


In [9]:
# 初始化聯邦學習過程的狀態
state = iterative_process.initialize()

# 定義聯邦學習的總輪數
num_rounds = 50
# 定義每輪訓練中的客戶端數量
num_clients_per_round = 3

for round_num in range(1, num_rounds + 1):
    # 隨機選擇當前輪次的客戶端
    selected_clients = np.random.choice(federated_data.client_ids, size=num_clients_per_round, replace=False)

    # 為選定的客戶端創建聯邦訓練數據集
    federated_train_data = [federated_data.create_tf_dataset_for_client(x) for x in selected_clients]

    # 執行下一輪的聯邦學習過程
    state, metrics = iterative_process.next(state, federated_train_data)

    # 打印當前輪次的度量結果
    print('Round {:2d}, Metrics: {}'.format(round_num, metrics))


Round  1, Metrics: OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.24382071), ('loss', 2.1597788), ('num_examples', 45960), ('num_batches', 1437)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_count', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
Round  2, Metrics: OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.26048738), ('loss', 1.8369081), ('num_examples', 45960), ('num_batches', 1437)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_count', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
Round  3, Metrics: OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('sparse_categorical_accuracy', 0.27330288), ('loss', 1.7398376), ('num_examples', 45960), ('num_batches', 1437)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_count', ())]))