In [None]:
pip install TensorFlow-privacy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting TensorFlow-privacy
  Downloading tensorflow_privacy-0.8.7-py3-none-any.whl (301 kB)
[K     |████████████████████████████████| 301 kB 17.7 MB/s 
[?25hCollecting dp-accounting==0.3.0
  Downloading dp_accounting-0.3.0-py3-none-any.whl (89 kB)
[K     |████████████████████████████████| 89 kB 2.8 MB/s 
[?25hCollecting tensorflow-probability==0.15.0
  Downloading tensorflow_probability-0.15.0-py2.py3-none-any.whl (5.7 MB)
[K     |████████████████████████████████| 5.7 MB 34.3 MB/s 
Collecting TensorFlow-privacy
  Downloading tensorflow_privacy-0.8.6-py3-none-any.whl (301 kB)
[K     |████████████████████████████████| 301 kB 51.7 MB/s 
[?25h  Downloading tensorflow_privacy-0.8.5-py3-none-any.whl (287 kB)
[K     |████████████████████████████████| 287 kB 47.3 MB/s 
  Downloading tensorflow_privacy-0.8.4-py3-none-any.whl (287 kB)
[K     |████████████████████████████████| 287 kB 60.

In [None]:
#Import Libraries
from __future__ import print_function
import argparse
import numpy as np
import random
import torch.nn as nn
import torch.optim as optim
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.autograd import Variable
import copy
from tensorflow_privacy.privacy.analysis.compute_noise_from_budget_lib import compute_noise

# **学習モデル定義**

In [None]:
class MyNet(nn.Module):
    #This defines the structure of the NN.
    def __init__(self):
        super(MyNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()  #Dropout
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        #Convolutional Layer/Pooling Layer/Activation
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        #Convolutional Layer/Dropout/Pooling Layer/Activation
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        #Fully Connected Layer/Activation
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        #Fully Connected Layer/Activation
        x = self.fc2(x)
        #Softmax gets probabilities.
        return F.log_softmax(x, dim=1)


In [None]:
#成功
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3) # 28x28x32 -> 26x26x32
        self.conv2 = nn.Conv2d(32, 64, 3) # 26x26x64 -> 24x24x64
        self.fc1 = nn.Linear(24 * 24 * 64, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(-1, 24 * 24 * 64)
        x = self.fc1(x)
        x = self.fc2(x)
        return  x

# **クライアント側動作コード**

In [None]:
class Client(nn.Module):
    """ Client of Federated Learning framework.
        1. Receive global model from server
        2. Perform local training (compute gradients)
        3. Return local model (gradients) to server
    """
    def __init__(self, train_d, test_d, model, E, B, sigma, lr, momentum, clip, num_label, datasize_label, q, DP):
      super(Client, self).__init__()
      self.train_dataset = []
      self.test_data = test_d
      self.model = model
      self.epoch = E
      self.batch = B
      self.sigma = sigma    # DP noise level
      self.lr = lr
      self.momentum = momentum
      self.clip = clip
      self.data_pytorch(train_d, num_label, datasize_label)
      self.q = q
      self.DP = DP

    #学習データを取得
    def data_pytorch(self,train_d, num_label, datasize_label):

      sorted_labels = []
      sorted_data = []
      #訓練データを各ラベル指定した学習枚数になるように再構成
      for i, data_counts in enumerate(datasize_label):
        for j in (num_label[i]):
          #指定したラベルの画像を抽出
          idx = train_d.train_labels == j
          pre_sort = train_d.train_labels[idx]
          pre_sort2 = (train_d.train_data[idx])/255
          #抽出データのランダム化、指定枚数ピックアップ
          sort_ran = np.random.permutation(len(pre_sort))
          sorted_labels.append((pre_sort[sort_ran])[:data_counts])
          sorted_data.append((pre_sort2[sort_ran])[:data_counts])
      #学習に使えるように処理を行う
      sorted_labels = torch.cat(sorted_labels, dim = 0)
      sorted_data = torch.cat(sorted_data, dim = 0)
      for i in range(len(sorted_labels)):
        self.train_dataset.append([sorted_data[i],sorted_labels[i]])

    #ガウシアンノイズを加える
    def gaussian_noise(self, data_shape, s, sigma):
      """ Gaussian noise """
      return torch.normal(0, sigma * s, data_shape)

    #クライアントが学習したモデルも精度を確かめる
    def test(self):
      self.model.eval()
      test_loss = 0
      correct = 0

      test_data = DataLoader(self.test_data, batch_size = self.batch, shuffle=True,)
      for data, target in test_data:
        data, target = Variable(data, volatile=True), Variable(target)
        output = self.model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

      test_loss /= len(test_data.dataset)
      print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_data.dataset),
        100. * correct / len(test_data.dataset)))

    #クライアント学習
    def solo_learn(self):
      train_data = DataLoader(self.train_dataset, batch_size = self.batch, shuffle=True,)
      self.model.train()
      self.test()
      criterion = nn.CrossEntropyLoss()
      optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

      for epoch in range(self.epoch):
        clipped_grads = {name: torch.zeros_like(param) for name, param in self.model.named_parameters()}

        for batch_idx, m in enumerate(train_data):
          data = m[0] #訓練データ
          data.resize_(self.batch,1, 28, 28)
          label = m[1] #ラベル

          #This will zero out the gradients for this batch.
          optimizer.zero_grad()
          output = self.model(data)
          # Calculate the loss The negative log likelihood loss. It is useful to train a classification problem with C classes.
          loss = criterion(output, label)
          loss.backward(retain_graph=True)

          clip_vals = []
          for i, p in enumerate(self.model.parameters()):
            assert not torch.isnan(p.grad).any(), "Grad has nan"
            clip_vals = torch.median(torch.abs(p.grad.data))

          if self.DP == 1:
            # bound l2 sensitivity (gradient clipping)
            # clip each of the gradient in the "Lot"
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm = clip_vals)

            for name, param in self.model.named_parameters():
              clipped_grads[name] += param.grad
            # add Gaussian noise
            for name, param in self.model.named_parameters():
              clipped_grads[name] += self.gaussian_noise(clipped_grads[name].shape, clip_vals, self.sigma)

            # scale back
            for name, param in self.model.named_parameters():
              clipped_grads[name] /= (len(self.train_dataset) * self.q)

            for name, param in self.model.named_parameters():
              param.grad = clipped_grads[name]

          clip_vals2 = []
          for i, p in enumerate(self.model.parameters()):
            clip_vals2 = torch.median(torch.abs(p))


          # update local model
          optimizer.step()
        print('Train Epoch: %d \tLoss: %f'%(epoch,loss.data))

# **サーバー側動作コード**

In [None]:
class Server(nn.Module):
    """ Server of Federated Learning
        1. Receive model (or gradients) from clients
        2. Aggregate local models (or gradients)
        3. Compute global model, broadcast global model to clients
    """
    def __init__(self, fl_param):
        super(Server, self).__init__()
        self.client_num = fl_param['N'] #FL学習参加人数
        self.epoch = fl_param['E'] #クライアント学習のエポック数
        self.batch = fl_param['B'] #Cクライアント学習のバッチサイズ
        self.num_label = fl_param['num_label'] #Clientのもつラベルの種類
        self.fl_round = fl_param['num_round']#FL round数
        self.num_data = fl_param['num_data'] #Clientの持つデータの総数
        self.global_model = fl_param['model'] #親モデル
        self.lr = fl_param['lr'] #学習率
        self.momentum = fl_param['momentum'] #SGD momentum (default: 0.5) Momentum is a moving average of our gradients (helps to keep direction).
        self.test_data = fl_param['test_data'] #テストデータ
        self.datasize_label = []
        for i in range(len(self.num_data)):
          self.datasize_label.append(int(self.num_data[i]/len(self.num_label[i]))) #ラベルごとの学習データ枚数

        # 加えるノイズ量を導出
        #それぞれのパラメータの役割は以下で確認
        #https://mukulrathi.com/privacy-preserving-machine-learning/deep-learning-differential-privacy/
        #https://opacus.ai/api/compute_dp_sgd_privacy.html
        #https://github.com/tensorflow/privacy/blob/052f9a31284ffd0fd1ac5ae928be017d1ae14c59/tensorflow_privacy/privacy/analysis/compute_noise_from_budget_lib.py#L23
        self.sigma = compute_noise(1, fl_param['q'], fl_param['epsilon'], fl_param['E']*fl_param['num_round'], fl_param['delta'], 1e-5)
        print("DPノイズ：",self.sigma)

        self.clients = [Client(  fl_param['train_data'],
                                 fl_param['test_data'],
                                 self.global_model,
                                 fl_param['E'],
                                 fl_param['B'],
                                 self.sigma,
                                 fl_param['lr'],
                                 fl_param['momentum'],
                                 fl_param['clip'],
                                 fl_param['num_label'],
                                 self.datasize_label,
                                 fl_param['q'],
                                 fl_param['DP']
                                )
                        for i in range(self.client_num)]

     #親モデル更新（FedAvg）
    def aggregate_weights(self):
      model_par = [self.clients[idx].model.state_dict() for idx in range(self.client_num)]
      new_par = copy.deepcopy(model_par[0])
      for name in new_par:
            new_par[name] = torch.zeros(new_par[name].shape)
      for idx, par in enumerate(model_par):
            for name in new_par:
                # new_par[name] += par[name] * (self.weight[idxs_users[idx]] / np.sum(self.weight[idxs_users]))
                new_par[name] += par[name] * (1 / self.client_num)
      self.global_model.load_state_dict(copy.deepcopy(new_par))
      return self.global_model.state_dict().copy()

    #親モデルの精度を確かめる
    def test_acc(self):
      self.global_model.eval()
      test_loss = 0
      correct = 0

      test_data = DataLoader(self.test_data, batch_size = self.batch, shuffle=True,)
      with torch.no_grad():
        for data, target in test_data:
                output = self.global_model(data)
                test_loss += F.nll_loss(output, target, reduction='sum').item()
                pred = output.argmax(dim=1, keepdim=True)
                correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= 10000

        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_data.dataset),
        100. * correct / len(test_data.dataset)))

    #クライアントに親モデルを配布する
    def distribute(self):
      for idx in range(self.client_num):
        self.clients[idx].model = copy.deepcopy(self.global_model)

    #学習処理
    def global_learning(self):
      for i in range(1,self.fl_round+1):
        print("==== FLround:",i,"times ====\n")
        for j in range(self.client_num):
          print("Client",j + 1,"learning_start")
          self.clients[j].solo_learn()
          print("\nClient",j +1 ,":learning_finish\n")

        print("==== Global_update ====")
        global_model = self.aggregate_weights()
        self.test_acc()
        self.distribute()

      return self.global_model,self.clients[0].model, self.sigma

# **学習データをダウンロード**

In [None]:
# 画像に変形を加える
transform = transforms.Compose([
    transforms.ToTensor() #Tensor型に
  ])
# 学習用データ取得
train_data = MNIST(
      './data',               # データの保存先
      train = True,           # 学習用データを取得する
      download = True,        # データが無い時にダウンロードする
      transform = transforms   # テンソルへの変換など
    )

# 評価用データ取得
test_data = MNIST(
      './data',
      train = False,
      transform = transform
  )


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



# **FL学習**

In [None]:
model = Net2() #学習モデルを宣言

fl_param = {
    'N' : 2, #Clientの数
    'E' : 3, #Client学習のエポック数
    'B' : 50, #Client学習のバッチサイズ
    'num_round' : 2, #FL round数
    'num_label' : [[0,1,2,3,4,5,6,7,8,9]], #Clientのもつラベルの種類
    'num_data' : [2500], #Clientの持つデータの総数
    'q' : 0.001, #DP用パラメータ
    'epsilon' :6, #DP用パラメータ
    'delta': 1e-10, #DP用パラメータ
    'clip' : 10, #DP用パラメータ
    'model' : model,
    'lr' : 0.001, #学習率
    'momentum' : 0.05, #SGD momentum (default: 0.5) Momentum is a moving average of our gradients (helps to keep direction).
    'train_data' : train_data,
    'test_data' : test_data,
    'DP' : 0, #DP有:1 無:0
}

In [None]:
fl_entity = Server(fl_param)

DP-SGD with sampling rate = 0.1% and noise_multiplier = 0.612624020347781 iterated over 6000 steps satisfies differential privacy with eps = 6 and delta = 1e-10.
DPノイズ： 0.612624020347781




In [None]:
nowmodel,C_model,noise = fl_entity.global_learning()

==== FLround: 1 times ====

Client 1 learning_start





Test set: Average loss: -0.0060, Accuracy: 1039/10000 (10%)

Train Epoch: 0 	Loss: 0.294097
Train Epoch: 1 	Loss: 0.577365
Train Epoch: 2 	Loss: 0.294585

Client 1 :learning_finish

Client 2 learning_start

Test set: Average loss: -9.8790, Accuracy: 8730/10000 (87%)

Train Epoch: 0 	Loss: 0.453480
Train Epoch: 1 	Loss: 0.467464
Train Epoch: 2 	Loss: 0.247296

Client 2 :learning_finish

==== Global_update ====

Test set: Average loss: -10.0869, Accuracy: 8895/10000 (89%)

==== FLround: 2 times ====

Client 1 learning_start

Test set: Average loss: -10.0869, Accuracy: 8895/10000 (89%)

Train Epoch: 0 	Loss: 0.534701
Train Epoch: 1 	Loss: 0.223861
Train Epoch: 2 	Loss: 0.330210

Client 1 :learning_finish

Client 2 learning_start

Test set: Average loss: -10.0869, Accuracy: 8895/10000 (89%)

Train Epoch: 0 	Loss: 0.153093
Train Epoch: 1 	Loss: 0.126592
Train Epoch: 2 	Loss: 0.252315

Client 2 :learning_finish

==== Global_update ====

Test set: Average loss: -10.0869, Accuracy: 8895/10000

# **モデルの保存**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
model_save_name1 = 'last.pth' # 保存ファイル名q0.001E4.6D10:2nd

In [None]:
model_save_name2 = 'C_last.pth' # 保存ファイル名q0.001E4.6D10:2nd

In [None]:
path1 = F"/content/drive/MyDrive/DEEP_FL/モデル/{model_save_name1}" # 保存先pathの設定

In [None]:
path2 = F"/content/drive/MyDrive/DEEP_FL/モデル/{model_save_name2}" # 保存先pathの設定

In [None]:
torch.save(nowmodel.state_dict(), path1) # 保存

In [None]:
torch.save(C_model.state_dict(), path2) # 保存