Mandatory imports

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import random
import tensorflow as tf
import time
from keras import backend as k
from keras import metrics
from tensorflow.keras import activations
from tensorflow.keras.layers import Input, Layer
from tensorflow.keras.optimizers import Adam, Adagrad
from tensorflow.keras.datasets import mnist

2023-04-28 11:08:41.473377: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-28 11:08:41.473422: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Load Dataset ( https://github.com/mgermain/MADE/releases/download/ICML2015/binarized_mnist.npz )

In [3]:
# Example for loading the data
#!wget https://github.com/mgermain/MADE/releases/download/ICML2015/binarized_mnist.npz 
# I stored the dataset in my drive
#def load_from_drive():
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
x_temp = X_train
x = x_temp.reshape(x_temp.shape[0],x_temp.shape[1]*x_temp.shape[2]) #flatten 28x28 pixels to one dimension (784 inputs)
x = np.where(x > 127, 1, 0) #binarize x
y = Y_train
seed= 77
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1]*X_test.shape[2]) #flatten 28x28 pixels to one dimension (784 inputs)
X_test = np.where(X_test > 127, 1, 0) 

#load_from_drive()

Create Mask Generator Module for creating/managing MADEs masks

In [2]:
class MaskGenerator(object):
  # num_masks: The amount of masks that will be cycled through during training. if num_masks == 1 then connectivity agnostic training is disabled
  # units_per_layer = Array containing # of units per layer
  # seed = The seed used for randomly sampling the masks, for guaranteeing reproducability
  # natural_input_order = Boolean defining if the natural input order (x1, x2, x3 etc) should be used
  # current_mask: Integer to keep track of the mask currently used (xth mask)
  # m: The mask values assigned to the networks units. 0 is the index of the input layer, 1 is the index of the first hidden layer and so on
  def __init__(self, num_masks, units_per_layer, natural_input_order = False, seed=42):
    self.num_masks = num_masks
    self.units_per_layer = units_per_layer
    self.seed = seed
    self.natural_input_order = natural_input_order
    self.current_mask = 0
    self.m = {}

    if natural_input_order: # init input ordering according to settings
      self.m[0] = np.arange(self.units_per_layer[0])
    else:
      self.shuffle_inputs(return_mask = False)
  
  #Iterate through the hidden layers, resample new connectivity values m and build/return the resulting new masks
  def shuffle_masks(self):
    layer_amount = len(self.units_per_layer)
    rng = np.random.RandomState(self.seed+self.current_mask)
    self.current_mask = (self.current_mask + 1) % self.num_masks # Cycle through masks
    for i in range(1, layer_amount -1): #skip input layer & output layer and only iterate through hidden_layers
      self.m[i] = rng.randint(self.m[i-1].min(), self.units_per_layer[0] -1, size = self.units_per_layer[i]) # sample m from [min_m(previous_layer, d-1)] for all hidden units
    new_masks = [tf.convert_to_tensor((self.m[l-1][:, None] <= self.m[l][None,:]), dtype=np.float32) for l in range(1, layer_amount-1)] # build hidden layer masks
    new_masks.append(tf.convert_to_tensor((self.m[layer_amount-2][:, None] < self.m[0][None, :]), dtype = np.float32)) #build output layer mask. Note that the m values for the output layer are the same as for the input layer 
    return new_masks

  # builds & returns direct mask. Call this method after shuffling inputs if order_agnostic training is active.
  # Note that the Mask values m are the same for both input and output layers
  def get_direct_mask(self):
    return tf.convert_to_tensor((self.m[0][:, None] < self.m[0][None, :]), dtype = np.float32)

  # shuffle input ordering and return new mask for first hidden layer
  def shuffle_inputs(self, return_mask = True):
    self.m[0] = np.random.permutation(self.units_per_layer[0])
    if return_mask:
      return tf.convert_to_tensor((self.m[0][:, None] <= self.m[1][None,:]), dtype=np.float32)
    return

Custom Layer for MADE masking

In [3]:
# should be self explaining
class MaskedLayer(Layer):
    def __init__(self,
                units,
                mask,
                activation='relu',
                kernel_initializer='glorot_uniform',
                bias_initializer='zeros',
                **kwargs):
      self.units = units
      self.mask = mask
      self.activation = activations.get(activation)
      self.kernel_initializer = kernel_initializer
      self.bias_initializer = bias_initializer
      super(MaskedLayer, self).__init__(**kwargs)

    def build(self, input_shape):
      #self.input_dim = input_shape[-1] if self.x_dim is None else input_shape[0][-1]

      self.W = self.add_weight(shape=self.mask.shape,
                                  initializer=self.kernel_initializer,
                                  name='W')

      self.bias = self.add_weight(shape=(self.units,),
                                      initializer=self.bias_initializer,
                                      name='bias')

      self.built = True

    def call(self, inputs):
        ## Modified keras.Dense to account for the mask
        masked_weights = self.W*self.mask
        output = k.dot(inputs, masked_weights)
        output = k.bias_add(output, self.bias, data_format = 'channels_last')
        if self.activation is not None:
            output = self.activation(output)
        return output

    def set_mask(self, mask):
        self.mask = mask

    def get_mask(self):
        return self.mask

    def compute_output_shape(self, input_shape):
        ##Same as keras.Dense
        assert input_shape and len(input_shape) >= 2
        assert input_shape[-1]
        output_shape = list(input_shape)
        output_shape[-1] = self.units
        return tuple(output_shape)



class ConditionningMaskedLayer(MaskedLayer):
    def __init__(self, 
                units,
                mask,
                activation='relu',
                kernel_initializer='glorot_uniform',
                bias_initializer='zeros',
                use_cond_mask=False,
                **kwargs):
        self.use_cond_mask = use_cond_mask
        super(ConditionningMaskedLayer, self).__init__(units,
                mask,
                activation,
                kernel_initializer,
                bias_initializer, **kwargs)

    def build(self, input_shape):
        if self.use_cond_mask:
            self.U = self.add_weight(shape=self.mask.shape,
                                     initializer=self.kernel_initializer,
                                     name='U')
        super().build(input_shape)

    def call(self, inputs):
        if self.use_cond_mask == False:
          return super().call(inputs)
        masked_w_weights = self.W*self.mask
        masked_u_weights_times_one_vec = k.dot(tf.ones(tf.shape(inputs)),self.U*self.mask)
        weighted_input = k.dot(inputs, masked_w_weights)
        weighted_input_and_bias = k.bias_add(weighted_input, self.bias, data_format = 'channels_last')
        output = weighted_input_and_bias + masked_u_weights_times_one_vec
        if self.activation is not None:
            output = self.activation(output)
        return output



class DirectInputConnectConditionningMaskedLayer(ConditionningMaskedLayer):
      def __init__(self,
                   units,
                   mask,
                   activation='relu',
                   kernel_initializer='glorot_uniform',
                   bias_initializer='zeros',
                   use_cond_mask=False,
                   direct_mask = None,
                **kwargs):
        self.direct_mask = direct_mask
        super(DirectInputConnectConditionningMaskedLayer, self).__init__(units,
                mask,
                activation,
                kernel_initializer,
                bias_initializer,
                use_cond_mask,
                **kwargs)

      def build(self, input_shape):
        if self.direct_mask is not None:
          self.D = self.add_weight(shape=self.direct_mask.shape,
                                  initializer=self.kernel_initializer,
                                  name='D')
        super().build(input_shape)

      def set_mask(self, mask, direct = False):
        if direct:
          self.direct_mask = mask
        else:
          super().set_mask(mask)

      def get_mask(self, direct = False):
        if direct:
          return self.direct_mask
        else:
          return super().get_mask

      def call(self, inputs):
        if self.direct_mask is None:
          return super().call(inputs)
        input, direct_input = inputs[0], inputs[1]

        masked_w_weights = self.W*self.mask
        weighted_input = k.dot(input, masked_w_weights)
        weighted_input_and_bias = k.bias_add(weighted_input, self.bias, data_format = 'channels_last')
        weighted_direct_input = k.dot(direct_input, self.D * self.direct_mask)

        if self.use_cond_mask:
          masked_u_weights_times_one_vec = k.dot(tf.ones(tf.shape(input)),self.U*self.mask)
          output = weighted_direct_input + weighted_input_and_bias + masked_u_weights_times_one_vec

        else: output = weighted_direct_input + weighted_input_and_bias

        if self.activation is not None:
            output = self.activation(output)
        return output

# MADE Model

In [4]:
# outputs: output Layer   ---------- Both needed when using ----------
# inputs: input Layer     ----------    base keras.Model    ----------     
# mask_generator: Mask Generator instance that manages the Models Masks
# order_agn: Boolean defining if training should be order_agnostic
# conn_agn: Boolean defining if training should be connectivity_agnostic
# direct_input: Boolean defining if direct input masks should be used
class ModelMADE(tf.keras.Model):
    def __init__(self, inputs, outputs, mask_generator, order_agn, conn_agn,
                 direct_input, **kwargs):
      super(ModelMADE, self).__init__(inputs = inputs, outputs = outputs, **kwargs)
      self.mask_generator = mask_generator
      self.order_agn = order_agn
      self.conn_agn = conn_agn
      self.direct_input = direct_input
    
    # Method called by fit for every batch
    def train_step(self, data):

      # reoder inputs, change masks
      if self.order_agn:
        # order agnostic and connectivity agnostic training
        if self.conn_agn:
          self.mask_generator.shuffle_inputs(return_mask = False)
          new_masks = self.mask_generator.shuffle_masks()
          for hidden_layer_id in range(len(new_masks)):
            self.layers[1+hidden_layer_id].set_mask(new_masks[hidden_layer_id]) #assign layer+1 since the first layer is no hidden layer and has no mask
        
        # order agnostic but not connectivity agnostic training        
        else:
          self.layers[1].set_mask(self.mask_generator.shuffle_inputs())
        if self.direct_input:
          self.layers[-1].set_mask(self.mask_generator.get_direct_mask(), direct=True)

      # not order agnostic but connectivity agnostic training
      elif self.conn_agn:
        new_masks = self.mask_generator.shuffle_masks()
        for hidden_layer_id in range(len(new_masks)):
          self.layers[1+hidden_layer_id].set_mask(new_masks[hidden_layer_id])


      # Unpack the data. Its structure depends on your model and
      # on what you pass to `fit()`.
      x, y = data

      with tf.GradientTape() as tape:
        y_pred = self(x, training=True)  # Forward pass
        # Compute the loss value
        # (the loss function is configured in `compile()`)
        loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)


      # Compute gradients
      trainable_vars = self.trainable_variables
      gradients = tape.gradient(loss, trainable_vars)
      # Update weights
      self.optimizer.apply_gradients(zip(gradients, trainable_vars))
      # Update metrics (includes the metric that tracks the loss)
      self.compiled_metrics.update_state(y, y_pred)
      # Return a dict mapping metric names to current value
      return {m.name: m.result() for m in self.metrics}

# MADE Object
responsible for building and inintalizing the MADE model

In [5]:
# units_per_layer = Array containing # of units per layer
# natural_input_order = Boolean defining if the natural input order (x1, x2, x3 etc) should be used
# num_masks: The amount of masks that will be cycled through during training. if num_masks == 1 then connectivity agnostic training is disabled
# order_agn: Boolean defining if training should be order_agnostic
# connectivity_weights: Boolean defining if connectivity weights should be used
# direct input: Boolean defining if there should be a direct input connection between input & output layer
  # seed = The seed used for randomly sampling the masks, for guaranteeing reproducability
class MADE(object):
  def __init__(self, units_per_layer, natural_input_order, num_masks, order_agn,
               connectivity_weights, direct_input, seed = "42"):
    self.units_per_layer = units_per_layer
    self.natural_input_order = natural_input_order
    self.num_masks = num_masks
    self.order_agn = order_agn
    self.connectivity_weights = connectivity_weights
    self.direct_input = direct_input
    self.seed = seed
    self.mask_generator = MaskGenerator(num_masks, units_per_layer, natural_input_order, seed)

  def build_model(self):
    # build input layer
    a = Input(shape = (self.units_per_layer[0],))
    x_layers = []
      
    #build masks
    masks = self.mask_generator.shuffle_masks()
    direct_mask = None

    #build hidden layers  
    for i in range(1,len(self.units_per_layer)-1): #exclude input & output layer
      if i == 1:
        x_layers.append(ConditionningMaskedLayer(units = self.units_per_layer[i], mask = masks[i-1], use_cond_mask = self.connectivity_weights)(a)) #activation is relu, call custom_masking with previous layer as input-param
      else:
        x_layers.append(ConditionningMaskedLayer(units = self.units_per_layer[i], mask = masks[i-1], use_cond_mask = self.connectivity_weights)(x_layers[i-1]))
          
    #build output layer, output layer's activation is sigmoid.
    if self.direct_input:
      direct_mask = self.mask_generator.get_direct_mask()
      output_layer = DirectInputConnectConditionningMaskedLayer(units = self.units_per_layer[-1], mask = masks[-1], activation='sigmoid', use_cond_mask = self.connectivity_weights, direct_mask = direct_mask)([x_layers[-1], a])
    else:
      output_layer = ConditionningMaskedLayer(units = self.units_per_layer[-1], mask = masks[-1], activation='sigmoid', use_cond_mask = self.connectivity_weights)(x_layers[-1])
    x_layers.append(output_layer)
    
    self.model = ModelMADE(inputs = a, outputs = x_layers[-1], mask_generator = self.mask_generator, order_agn = self.order_agn, conn_agn = self.num_masks>1,
                           direct_input=self.direct_input)
    return self.model

  def summary(self):
    return self.model.summary()

# Loss Function

In [6]:
def cross_entropy_loss(x, x_decoded_mean):
    x = k.flatten(x)
    x_decoded_mean = k.flatten(x_decoded_mean)
    #print("loss", len(X_train[1]))
    xent_loss = 150 * metrics.binary_crossentropy(x, x_decoded_mean)
    return xent_loss

In [7]:
def average_model(client_weights):
  av_param= []
  avg= 1/len(client_weights)
  #avg= 0.1
  print(avg)
  for i in range(len(client_weights[0])):
    av_param.append([])
  for i in range(len(client_weights)):
    for j in range(len(client_weights[i])):
      if i ==0:
        av_param[j].append(avg * client_weights[i][j])
      else:
        av_param[j] = av_param[j] + (avg * client_weights[i][j])
  for i in range(len(client_weights[0])):
    av_param[i]=np.squeeze(av_param[i])
  return av_param

# Build & Run Model

In [None]:


######################### Settings #########################
_optimizer_type = "ada" #for any other string here then adam Adagrad is used
_adam_lr = 0.001 #0.1, 0.05, 0.01, 0.005
_ada_lr = 0.001 #0.1, 0.05, 0.01, 0.005
_ada_epsilon = 1e-6

_hidden_layers = [500]
_natural_input_order = False
_num_masks = 1
_order_agn = True
_order_agn_step_size = 1
_conn_agn_step_size = 1
_connectivity_weights = False
_direct_input = True
_seed = 42
_batch_size = 100
_epochs = 1

if _optimizer_type == "adam": 
  optimizer = Adam(_adam_lr)
else: 
  optimizer = Adagrad(_ada_lr, epsilon = _ada_epsilon)

tf.keras.backend.clear_session()

units_per_layer = np.concatenate(([784], _hidden_layers, [784])) #in MADE case the input & output layer have the same amount of units
print("shape",units_per_layer)
seeds= [42]

print(seeds)
data_split= ['sull']#['full', 12000, 6000, 3000, 1500]
clients= [3] #[3,5,10,20,40]
for i in range(len(data_split)):
  print(f" START TRAINING DATA SPLIT {data_split[i]}")
  seed = seeds[i]
  num_clients= clients[i]

    


  for _seed in seeds:
    temp = MADE(units_per_layer, natural_input_order=_natural_input_order, num_masks = _num_masks, order_agn = _order_agn, 
                connectivity_weights = _connectivity_weights, direct_input = _direct_input, seed = _seed)
    model = temp.build_model()
    model.compile(optimizer=optimizer, loss=cross_entropy_loss, run_eagerly=True)

    start = time.time()
    print("enter")
    #num_clients = 2
    num_rounds= 50
    num_tasks= 1

    loss = {}
    for c in range(num_clients):
      loss[f'{c}']= []

    val_loss = {}
    for c in range(num_clients):
      val_loss[f'{c}']= []
    for t in range(num_tasks):
      for r in range(num_rounds):   
        client_weights= []
        for c in range(num_clients):
          X= np.load(f'/home/subarna/Pictures/LARS/FedWeIT-MADE/content/Task_{t*num_clients+ c}.npy')
          X_val= np.load(f'/home/subarna/Pictures/LARS/FedWeIT-MADE/content/Val_{t*num_clients+ c}.npy')

          #X=  tf.convert_to_tensor(data['x_train'])
          history = model.fit(
              X, X,
              batch_size=_batch_size,
              epochs=_epochs,
              validation_data=(X_val, X_val)
          )
          
          #print(history.history.keys())
          #val_loss[f'{c}'].append(history.history['val_loss'])
          loss[f'{c}'].append(history.history['loss'])
          temp= history.history['loss']
          el= model.get_weights()
          #print(el)
          if t > 0:
            for pt in range(t):
              val_data= np.load(f'/home/subarna/Pictures/LARS/FedWeIT-MADE/output/binary/binary_{(pt)*num_clients+ c}_valid.npy')
              val= val_data['x_valid']
              test_loss=model.evaluate(val, val, batch_size=_batch_size)
              f = open("/home/subarna/Pictures/LARS/FedWeIT-MADE/federated_offline.txt", "a")
              f.write('\n'+f'forgetting loss of task  {pt} for client {c} at current task {t}: {test_loss}' )
              f.close()

            

          client_weights.append(el)
          f = open("/home/subarna/Pictures/LARS/FedWeIT-MADE/federated_offline.txt", "a")
          f.write('\n'+
                  f'[task {t}]'+
                  f'[round {r}]'+
                  f'[client {c}] '+
                  f'[loss:{temp}]')
          f.close()
        model_param= average_model(client_weights)
        model.set_weights(model_param)

    done = time.time()
    elapsed = done - start
    print("Elapsed: ", elapsed)
    print(f"Number of masks: {_num_masks}")
    test_loss=model.evaluate(X_test, X_test, batch_size=_batch_size)
    #t_loss.append(test_loss)
    print(f"Test Loss: {test_loss}")

    #np.save(f'content/VAL_Loss_CM_{_seed}', val_loss)
    #np.save(f'/app/src/FedWit/output/Loss_CM_{_seed}_{data_split[i]}', loss)
  #np.save(f'content/Test_loss_{data_split[i]}', test_loss)
  #weights= model.get_weights()
  #np.save(f'content/modelweights_{data_split[i]}', weights)
  #print(_seed)

#model.save('/content/mymodel')
#model= keras.models.load_model('/content/mymodel')



In [19]:
######################### Settings #########################
_optimizer_type = "ada" #for any other string here then adam Adagrad is used
_adam_lr = 0.001 #0.1, 0.05, 0.01, 0.005
_ada_lr = 0.005 #0.1, 0.05, 0.01, 0.005
_ada_epsilon = 1e-6

_hidden_layers = [500]
_natural_input_order = False,
_num_masks = 1
_order_agn = True
_order_agn_step_size = 1
_conn_agn_step_size = 1
_connectivity_weights = False
_direct_input = False
_seed = 42
_batch_size = 100
_epochs = 50

if _optimizer_type == "adam": 
  optimizer = Adam(_adam_lr)
else: 
  optimizer = Adagrad(_ada_lr, epsilon = _ada_epsilon)

tf.keras.backend.clear_session()
#print("build",X_train.shape)
units_per_layer = np.concatenate(([150], _hidden_layers, [150])) #in MADE case the input & output layer have the same amount of units

temp = MADE(units_per_layer, natural_input_order=_natural_input_order, num_masks = _num_masks, order_agn = _order_agn, 
            connectivity_weights = _connectivity_weights, direct_input = _direct_input, seed = _seed)
model = temp.build_model()
model.compile(optimizer=optimizer, loss=cross_entropy_loss, run_eagerly=True)
model.summary()

start = time.time()
data= np.load(f'output/binary/binary_0_train.npy', allow_pickle=True).item()
X=  tf.convert_to_tensor(data['x_train'])
val_data= np.load(f'output/binary/binary_0_valid.npy', allow_pickle=True).item()
X_valid= val_data['x_valid']
#test_data= np.load(f'output/binary/binary_0_test.npy', allow_pickle=True).item()
#X_test= test_data['x_test']
plt.history = model.fit(
              X, X,
              batch_size=_batch_size,
              epochs=_epochs,
              validation_data=(X_valid, X_valid)
                       ) 
done = time.time()
elapsed = done - start
print("Elapsed: ", elapsed)
print(f"Number of masks: {_num_masks}")
test_loss=model.evaluate(X_test, X_test, batch_size=_batch_size)
print(f"Test Loss: {test_loss}")

Model: "model_made"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 150)]             0         
                                                                 
 conditionning_masked_layer   (None, 500)              75500     
 (ConditionningMaskedLayer)                                      
                                                                 
 conditionning_masked_layer_  (None, 150)              75150     
 1 (ConditionningMaskedLayer                                     
 )                                                               
                                                                 
Total params: 150,650
Trainable params: 150,650
Non-trainable params: 0
_________________________________________________________________


2023-04-28 11:35:33.376768: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-04-28 11:35:33.376792: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2023-04-28 11:35:33.376810: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (subarna-ThinkPad-P14s-Gen-2a): /proc/driver/nvidia/version does not exist
2023-04-28 11:35:33.377018: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Elapsed:  853.6430950164795
Number of masks: 1


NameError: name 'X_test' is not defined

In [16]:
model.save('results/mod')

INFO:tensorflow:Assets written to: results/mod/assets


In [24]:
from emnist import extract_training_samples
import numpy as np
images, label = extract_training_samples('letters')
images= np.where(images> 127, 1,0)
x_emnist = images.reshape(images.shape[0],images.shape[1]*images.shape[2])
num= [1,2,3,4,5,6,7,8,9,10]

In [25]:
idx= np.concatenate([np.where(label[:] == c)[0] for c in num], axis= 0)
len(idx)

48000

In [26]:
data= images[idx]

In [27]:
data.shape

(48000, 28, 28)