<a href="https://colab.research.google.com/github/lowbee2019/Algorithm/blob/master/MyModel-v0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow-federated

Collecting tensorflow-federated
[?25l  Downloading https://files.pythonhosted.org/packages/89/ed/fb2ea1b442efcd11303e5154efe397927910a02228952c8bdaa0835739bc/tensorflow_federated-0.18.0-py2.py3-none-any.whl (578kB)
[K     |████████████████████████████████| 583kB 8.6MB/s 
Collecting tensorflow-addons~=0.12.0
[?25l  Downloading https://files.pythonhosted.org/packages/74/e3/56d2fe76f0bb7c88ed9b2a6a557e25e83e252aec08f13de34369cd850a0b/tensorflow_addons-0.12.1-cp37-cp37m-manylinux2010_x86_64.whl (703kB)
[K     |████████████████████████████████| 706kB 16.6MB/s 
Collecting tensorflow-privacy~=0.5.0
[?25l  Downloading https://files.pythonhosted.org/packages/41/ae/7db0dcf76a746314a174578a7b99ff098b40b908c4c693a955a2bbc0127b/tensorflow_privacy-0.5.1-py3-none-any.whl (149kB)
[K     |████████████████████████████████| 153kB 29.1MB/s 
Collecting attrs~=19.3.0
  Downloading https://files.pythonhosted.org/packages/a2/db/4313ab3be961f7a763066401fb77f7748373b6094076ae2bda2806988af6/attrs-19.3.0-py

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
import collections
import tensorflow_federated as tff
import copy
import random

# Server Initialization

In [None]:
class ParamServer(object):
  def __init__(self,init_model_path):
    self.init_model_path = init_model_path
    self.round = 0
    self.optimizer="adam"
    self.loss = "sparse_categorical_crossentropy"
    self.metrics =['accuracy']

  def PreTrain(self,model,data,labels,batch_size=32,epochs=5):
    model.compile(optimizer=self.optimizer,
                  loss=self.loss,
                  metrics=self.metrics)
    model.fit(data,labels,batch_size,epochs)
    return model
  
  def PreEval(self,model,test_data,test_labels):
    model.compile(optimizer=self.optimizer,
                  loss=self.loss,
                  metrics=self.metrics)
    model.evaluate(test_data,test_labels)


## Mnist数据集的预处理

In [None]:
mnist = tf.keras.datasets.mnist
train,test = mnist.load_data()
x_train,y_train = train
x_test,y_test = test
x_train,x_test = x_train/255.0,x_test/255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
x_train.shape

(60000, 28, 28)

## 模型的定义

In [None]:
class FLModel(tf.keras.Model):
  def __init__(self,row,col,num_classes=62,*args,**kwargs):
    super(FLModel,self).__init__(*args,**kwargs)
    self.row = row
    self.col = col
    self.num_classes = num_classes
    self.flatten = tf.keras.layers.Flatten(input_shape=(row,col))
    self.dense_1 = tf.keras.layers.Dense(128,activation='relu',name="dense_1")
    self.dense_2 = tf.keras.layers.Dense(num_classes,activation='softmax',name="dense_2")
    self.drop = tf.keras.layers.Dropout(0.2)


  def call(self,inputs):
    x = self.flatten(inputs)
    x = self.dense_1(x)
    x = self.drop(x)
    x = self.dense_2(x)
    return x
  
  def copy(self):
    NewModel = FLModel(self.row,self.col,self.num_classes)
    _ = NewModel(np.expand_dims(tf.zeros([self.row,self.col]),0))
    ##Layers
    NewVars = NewModel.trainable_variables #此处为空列表，因为没有训练,这个地方参数传递是不是有问题
    OldVars = self.trainable_variables
    for n,o in zip(NewVars,OldVars):
      n.assign(o.numpy())
    # NewVars=OldVars[:]
    return NewModel

  # def compute_output_shape(self,input_shape):
  #   shape = tf.TensorShape(input_shape).as_list()
  #   shape[-1] = self.num_classes
  #   return tf.TensorShape(shape)

#Client Initialization
模型训练和评估都要重写

In [None]:
class Client(object):
  def __init__(self,id,model=None,Train_dataset=None,Test_dataset=None):
    self.id = id
    self.model = model
    self.Train_dataset = Train_dataset
    self.Test_dataset = Test_dataset
    # from ParamServer
    self.ParamModel = None
    self.num_epochs = 5
    self.batch_size = 32
    self.shuffle_buffer = 100
    self.prefetch_buffer = 10


  def ReceiveModel(self,ParamModel,num_epochs=1,batch_size=32,shuffle_buffer=100,prefetch_buffer=10):
    self.ParamModel = ParamModel
    self.num_epochs = num_epochs
    self.batch_size = batch_size
    self.shuffle_buffer = shuffle_buffer
    self.prefetch_buffer = prefetch_buffer

  def preprocess(self,dataset):
    def batch_format_fn(element):
      return collections.OrderedDict(
            
            x = tf.reshape(element['pixels'],[-1,784]),
            y = tf.reshape(element['label'],[-1,1])
          )
    return dataset.repeat(self.num_epochs).shuffle(self.shuffle_buffer).batch(
        self.batch_size).map(batch_format_fn).prefetch(self.prefetch_buffer)

  def NodeTrain(self):
    model = self.ParamModel
    # pre_data = self.preprocess(self.Train_dataset)
    # print(type(pre_data))
    # model.fit(x=pre_data['x'],y=pre_data['y'],batch_size=self.batch_size,epochs=self.num_epochs)
    # x = np.array([i['pixels'].numpy() for i in self.Train_dataset])
    # y = np.array([i['label'].numpy() for i in self.Train_dataset])
    x,y = self.Train_dataset
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy'])
    model.fit(x,y,batch_size=self.batch_size,epochs=self.num_epochs)
    self.model = model

  def NodeEval(self):
    # x = np.array([i['pixels'].numpy() for i in self.Test_dataset])
    # y = np.array([i['label'].numpy() for i in self.Test_dataset])
    x,y = self.Test_dataset
    self.model.evaluate(x,y)


#阶段一：服务器完成模型初始化

In [None]:
InitModel = FLModel(row=28,col=28,num_classes=62)

In [None]:
S.PreEval(InitModel,x_test,y_test)



In [None]:
S = ParamServer('')
SendModel = S.PreTrain(InitModel,x_train,y_train,epochs=1)
S.PreEval(SendModel,x_test,y_test)



In [None]:
#x_test.shape

(10000, 28, 28)

# 阶段二：为Client分配数据（预处理工作）

In [None]:
# emnist_train,emnist_test = tff.simulation.datasets.emnist.load_data(only_digits=False)

Downloading data from https://storage.googleapis.com/tff-datasets-public/fed_emnist.tar.bz2


In [None]:
!pip install emnist

Collecting emnist
  Downloading https://files.pythonhosted.org/packages/d1/f4/78b24acbef9e8fe976dda700f16a3606f3b8363b015bc555f8050fbbd8ac/emnist-0.0-py3-none-any.whl
Installing collected packages: emnist
Successfully installed emnist-0.0


In [None]:
import emnist

In [None]:
emnist_dataset = emnist.extract_training_samples('byclass')

In [None]:
#len(emnist_train.client_ids)

In [None]:
#emnist_train.element_type_structure

In [None]:
#example_dataset = emnist_train.create_tf_dataset_for_client(emnist_train.client_ids[0])

In [None]:
#example_element = next(iter(example_dataset))
#example_element['label'].numpy()

In [None]:
def create_data_for_clients(BasicSet,Num):
  x_sequence = []
  y_sequence = []
  for i in range(Num):
    index = random.randint(0,Num-1)
    x_sequence.append(BasicSet[0][index])
    y_sequence.append(BasicSet[1][index])
  return (np.array(x_sequence),np.array(y_sequence))

In [None]:
NUM_CLIENTS = 10
NUM_TRAIN_SAMPLE_CLIENT = 640
NUM_TEST_SAMPLE_CLIENT = 256

In [None]:
# clients=[Client(i,Train_dataset=train,Test_dataset=test) for i in range(NUM_CLIENTS)]

In [None]:
clients=[]

In [None]:
for i in range(NUM_CLIENTS):
  clients.append(Client(i,
                        Train_dataset=create_data_for_clients(emnist_dataset,NUM_TRAIN_SAMPLE_CLIENT),
                        Test_dataset=create_data_for_clients(emnist_dataset,NUM_TEST_SAMPLE_CLIENT))
  )

In [None]:
# clients = [Client(i,Train_dataset=emnist_train.create_tf_dataset_for_client(emnist_train.client_ids[i]),
#                     Test_dataset=emnist_test.create_tf_dataset_for_client(emnist_test.client_ids[i])
#                     )
#                  for i in range(NUM_CLIENTS) ] #此处需要重写，用于确定数字占比

In [None]:
#clients[0].Train_dataset

In [None]:
#len(clients[0].Train_dataset),len(clients[0].Test_dataset)

# 阶段三：Clients 训练节点数据并聚合


## 节点训练
OK！fine,我这两天就是为了实现一个循环。。。
真他娘的丢人

In [None]:
for i in range(NUM_CLIENTS):
  clients[i].ReceiveModel(a.copy())
  clients[i].NodeTrain()

NameError: ignored

In [None]:
for i in range(NUM_CLIENTS):
  print("Node {} is evaluating...".format(i))
  clients[i].NodeEval()
  ##虽然这个步骤没啥用

Node 0 is evaluating...
Node 1 is evaluating...
Node 2 is evaluating...
Node 3 is evaluating...
Node 4 is evaluating...
Node 5 is evaluating...
Node 6 is evaluating...
Node 7 is evaluating...
Node 8 is evaluating...
Node 9 is evaluating...


In [None]:
def DiffModel(model1,model2,layer_name,w_vs_b=0):
  l1 = model1.get_layer(layer_name).get_weights()[w_vs_b].reshape([-1,])
  l2 = model2.get_layer(layer_name).get_weights()[w_vs_b].reshape([-1,])
  # l1 = model1.trainable_variables
  # l2 = model2.trainable_variables
  # print(l1)
  l1.shape
  # l1 = l1.reshape([-1,])
  # l2 = l2.reshape([-1,])
  for i in range(len(l1)):
    if l1[i]!=l2[i]:
      print("NOT SAME MODEL")
      return

In [None]:
DiffModel(clients[0].model,clients[1].model,'dense_1')

NOT SAME MODEL


## 模型聚合 FedAvg
THIS IS THE MOST IMPORANT！

In [None]:
##先实现普通聚合，再考虑距离问题

In [None]:
# clients[0].Train_dataset[0].shape

(60000, 28, 28)

In [None]:
# a_1 = a.trainable_variables
# b_1 = clients[0].model.trainable_variables
# for _a,_b in zip(a_1,b_1):
#   # print(_a,_b,'\n')
#   print(type(_a))

<class 'tensorflow.python.eager.def_function.UnliftedInitializerVariable'>
<class 'tensorflow.python.eager.def_function.UnliftedInitializerVariable'>
<class 'tensorflow.python.eager.def_function.UnliftedInitializerVariable'>
<class 'tensorflow.python.eager.def_function.UnliftedInitializerVariable'>


In [None]:
# tmp = a_1[0]
# tmp2 = b_1[0]

In [None]:
# print(tmp.name)
# print(tmp2.name)

fl_model_57/dense_1/kernel:0
fl_model_60/dense_1/kernel:0


In [None]:
# tmp.numpy()

array([[-0.07970653,  0.07991762,  0.05910995, ...,  0.0579403 ,
         0.0321511 ,  0.02421231],
       [-0.05695383,  0.03625515, -0.06860252, ...,  0.02591816,
         0.04554088,  0.02055327],
       [-0.01783558, -0.04462572, -0.02057564, ...,  0.02187481,
        -0.04054561, -0.02853347],
       ...,
       [-0.05783828, -0.05880497,  0.07976013, ..., -0.03510847,
        -0.03695404,  0.03616227],
       [-0.06651882,  0.02818294, -0.02726433, ..., -0.05769905,
        -0.04426115, -0.00901749],
       [-0.01624558,  0.05673347, -0.07569796, ..., -0.05263541,
        -0.0753431 , -0.04903179]], dtype=float32)

In [None]:
# tmp.numpy().shape

(784, 128)

In [None]:
#  tmp2 =np.zeros(a.trainable_variables[0].numpy().shape,dtype='float64')

In [None]:
# tmp2.shape

(784, 128)

In [None]:
#聚合先不考虑权重
def FedAvg(NUM_CLIENTS):
  iterModel = a.copy()
  len_it = len(iterModel.trainable_variables)
  all_weights = []
  for weights_index in range(len_it):
    tmp =np.zeros(iterModel.trainable_variables[weights_index].numpy().shape,dtype='float64')

    for node in range(NUM_CLIENTS):
      tmp += clients[node].model.trainable_variables[weights_index].numpy()
    all_weights.append(tmp)
  # print(all_weights)
  iterModel_vars = iterModel.trainable_variables
  for iter_v,n in zip(iterModel_vars,all_weights):
    iter_v.assign(n)
  return iterModel

In [None]:
fedmodel = FedAvg(10)

In [None]:
#昨天留的任务是如何把聚合后的numpy数组，转到模型中

In [None]:
S.PreEval(fedmodel,x_test,y_test)



In [None]:
len(fed)

4

## 基本框架已经

In [None]:
iter2 = a.copy()

In [None]:
iter2_vars = iter2.trainable_variables

In [None]:
iter2_vars

In [None]:
for _v1,_v2 in zip(iter2_vars,fed):
  _v1.assign(_v2)

In [None]:
DiffModel(iter2,a,'dense_1')

NOT SAME MODEL


In [None]:
b = clients[0].model

In [None]:
v1 = a.trainable_variables
v2 = b.trainable_variables

In [None]:
b = FLModel(28,28,62)
v1 = a.trainable_variables
# S.PreTrain(b,x_train,y_train)
v2 = b.trainable_variables
print(b.trainable_variables)
# for v_1,v_2 in zip(v1,v2):
#   print("??")
  # v_1.assign(v_2.numpy())

[]


In [None]:
v2=v1[:]

In [None]:
id(v2),id(b.trainable_variables)

(140499046190288, 140499104477984)

In [None]:
id(a.trainable_variables)

140499217825136

In [None]:
tmpmodel = keras.models.Sequential([
                                   keras.layers.Dense(32,input_shape=(784,),activation='relu'),
])

In [None]:
tmpmodel.trainable_variables

[<tf.Variable 'dense_1/kernel:0' shape=(784, 32) dtype=float32, numpy=
 array([[ 0.07944056, -0.01687008,  0.06158806, ...,  0.02253681,
         -0.00866669,  0.05025689],
        [-0.07876401,  0.06727929, -0.04862248, ..., -0.0574102 ,
          0.0471491 ,  0.00323779],
        [ 0.02786615,  0.0126921 , -0.03494832, ...,  0.04175083,
         -0.0363945 ,  0.05179618],
        ...,
        [ 0.06009207, -0.02396834, -0.06391142, ...,  0.04591579,
          0.00446624,  0.06287944],
        [ 0.03360625,  0.03261185,  0.08488534, ...,  0.04576839,
          0.02080478, -0.05224812],
        [-0.06869756,  0.05392497, -0.01011685, ...,  0.04216174,
         -0.04211827,  0.07964728]], dtype=float32)>,
 <tf.Variable 'dense_1/bias:0' shape=(32,) dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       dtype=float32)>]

In [None]:
z = FLModel(28,28)