In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
tfd = tfp.distributions

In [2]:
tf.__version__

'2.1.0'

In [3]:
# Define Bayes Model
def bayes_model_arch(voxel_dim=64,deviation_channels=3,output_dimension=6):
    kl_divergence_function = (lambda q, p, _: tfd.kl_divergence(q, p) / tf.cast(4000, dtype=tf.float32))
    negloglik = lambda y, rv_y: -rv_y.log_prob(y)
    aleatoric_std=0.001
    aleatoric_tensor=[aleatoric_std] * output_dimension
    model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(voxel_dim,voxel_dim,voxel_dim,deviation_channels)),
    tfp.layers.Convolution3DFlipout(32, kernel_size=(5,5,5),kernel_divergence_fn=kl_divergence_function,strides=(2,2,2),activation=tf.nn.relu),
    tfp.layers.Convolution3DFlipout(32, kernel_size=(4,4,4),kernel_divergence_fn=kl_divergence_function,strides=(2,2,2),activation=tf.nn.relu),
    tfp.layers.Convolution3DFlipout(32, kernel_size=(3,3,3),kernel_divergence_fn=kl_divergence_function,strides=(1,1,1),activation=tf.nn.relu),
    tf.keras.layers.MaxPooling3D(pool_size=[2, 2, 2]),
    tf.keras.layers.Flatten(),
    tfp.layers.DenseFlipout(128,activation=tf.nn.relu,kernel_divergence_fn=kl_divergence_function),
    tfp.layers.DenseFlipout(64,kernel_divergence_fn=kl_divergence_function,activation=tf.nn.relu),
    tfp.layers.DenseFlipout(output_dimension,kernel_divergence_fn=kl_divergence_function),
    tfp.layers.DistributionLambda(lambda t:tfd.MultivariateNormalDiag(loc=t[..., :output_dimension], scale_diag=aleatoric_tensor)),])

    #negloglik = lambda y, p_y: -p_y.log_prob(y)
    #experimental_run_tf_function=False
    #tf.keras.optimizers.Adam(lr=0.001)
    model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001),experimental_run_tf_function=False,loss=negloglik,metrics=[tf.keras.metrics.MeanAbsoluteError()])
    #print("3D CNN model successfully compiled")
    print(model.summary())
    return model

model=bayes_model_arch()

Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Do not pass `graph_parents`.  They will  no longer be used.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv3d_flipout (Conv3DFlipou (None, 30, 30, 30, 32)    24032     
_________________________________________________________________
conv3d_flipout_1 (Conv3DFlip (None, 14, 14, 14, 32)    131104    
_________________________________________________________________
conv3d_flipout_2 (Conv3DFlip (None, 12, 12, 12, 32)    55328     
_________________________________________________________________
max_pooling3d (MaxPooling3D) (None, 6, 6, 6, 32)       0         
_________________________________________________________________
flatten (Flatten)            (None, 6912)              0         
_________________________________________________________________
dense_flipout (DenseFlipout)

In [4]:
#Loading Model Weights
part_type="inner_rf_assembly"
train_path='../trained_models/'+part_type
weight_path=train_path+'/model'+'/Bayes_trained_model_0'
model.load_weights(weight_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1e793e68048>

In [22]:
# Template for extracting weights [0] mean weights , [1] Std Dev Weights ,[2] Bias (deterministic)
weight_mean_val=[]
weight_std_val=[]

#Get Weights for each layer
for layer in model.layers:
    try:
        weight_mean_val.append(layer.get_weights()[0])
        #Weight Std to be mapped to importance (importnace incersely proporational to learning rate)
        weight_std_val.append(layer.get_weights()[1])
        print(layer.name,layer.get_weights()[0].shape)
    except:
        print("No weights for the following layer: ",layer.name)

print(len(weight_std_val))

print("Computing Weight Importance...")

weight_importance=[]
base_lr=0.001
for weight_std in weight_std_val:
    weight_importance.append(base_lr*np.reciprocal(weight_std))
    print(np.reciprocal(weight_std).shape)

param_wise_lr=weight_importance

conv3d_flipout (5, 5, 5, 3, 32)
conv3d_flipout_1 (4, 4, 4, 32, 32)
conv3d_flipout_2 (3, 3, 3, 32, 32)
No weights for the following layer:  max_pooling3d
No weights for the following layer:  flatten
dense_flipout (6912, 128)
dense_flipout_1 (128, 64)
dense_flipout_2 (64, 6)
No weights for the following layer:  distribution_lambda
6
Computing Weight Importance...
(5, 5, 5, 3, 32)
(4, 4, 4, 32, 32)
(3, 3, 3, 32, 32)
(6912, 128)
(128, 64)
(64, 6)


In [26]:
#Get Data Using data import module
len(model.variables)

18

In [52]:
# Otaining Gradient Structure for model
x = tf.random.normal([7, 5])
test_model = tf.keras.Sequential([tf.keras.layers.Dense(10, activation=tf.nn.relu),
                                  tf.keras.layers.Flatten(),
                             tf.keras.layers.Dense(10, activation=tf.nn.relu)])
#layer = tf.keras.layers.Dense(10, activation=tf.nn.relu)

with tf.GradientTape() as t2:
  # The inner tape only takes the gradient with respect to the input,
  # not the variables.
  with tf.GradientTape(watch_accessed_variables=False) as t1:
    t1.watch(x)
    y = test_model(x)
    out = tf.reduce_sum(test_model(x)**2)
  # 1. Calculate the input gradient.
  g1 = t1.gradient(out, x)
  # 2. Calculate the magnitude of the input gradient.
  g1_mag = tf.norm(g1)

# 3. Calculate the gradient of the magnitude with respect to the model.
dg1_mag = t2.gradient(g1_mag, test_model.trainable_variables)
test_model.trainable_variables

[<tf.Variable 'sequential_3/dense_7/kernel:0' shape=(5, 10) dtype=float32, numpy=
 array([[ 0.6258982 , -0.4979097 ,  0.28686547, -0.01360267,  0.05580539,
         -0.56882447, -0.6072071 , -0.55717945,  0.5352369 ,  0.11377299],
        [-0.16711238,  0.30352652,  0.14272827,  0.20083952,  0.51501626,
          0.15367812,  0.60684675, -0.42796087, -0.6226447 , -0.39787495],
        [ 0.41024953, -0.53364325,  0.00331223, -0.07450342,  0.03721464,
          0.4702887 ,  0.433626  ,  0.369016  ,  0.03784174, -0.07247078],
        [ 0.37795883, -0.2091943 , -0.19328001, -0.20671052, -0.604131  ,
          0.5799908 ,  0.29559696,  0.01463783, -0.62099135, -0.49084884],
        [-0.01541686,  0.6160254 ,  0.6006045 ,  0.5407329 ,  0.4209569 ,
         -0.44127223,  0.25320876,  0.5104522 , -0.28755203,  0.6040004 ]],
       dtype=float32)>,
 <tf.Variable 'sequential_3/dense_7/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>,
 <tf.V

In [53]:
#Dummy Gradient Application
g1_mag
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
optimizer.apply_gradients(zip(dg1_mag*2,layer.trainable_variables))
#Key is change 

<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [62]:
# Multiply Gradients with importance factor before application
importance=test_model.trainable_variables

def update_gradients(gradients, importance):
    grad_importance=[]
    for grad_array,importance_array in zip(gradients,importance):
        grad_importance_array=grad_array*importance_array
        print(grad_importance_array.shape)
        grad_importance.append(grad_importance_array)
    
    return grad_importance

grad_importance=update_gradients(dg1_mag,importance)

(5, 10)
(10,)
(10, 10)
(10,)


In [43]:
layer.trainable_variables

[<tf.Variable 'dense_2/kernel:0' shape=(5, 10) dtype=float32, numpy=
 array([[ 0.40520763,  0.01090676,  0.56578726, -0.3407245 ,  0.02358539,
         -0.40111548,  0.13548976, -0.5364721 , -0.03973733, -0.2508733 ],
        [-0.3281536 , -0.22694927, -0.55398303,  0.3664626 , -0.6103132 ,
          0.15862253,  0.068159  , -0.03223365, -0.57953167,  0.22649029],
        [ 0.4044137 , -0.01696028,  0.15214351,  0.2334536 , -0.5133557 ,
          0.21818036, -0.01759515, -0.5542543 ,  0.5166201 , -0.00869151],
        [ 0.1070876 ,  0.32423657,  0.11652444, -0.54368824, -0.28556326,
          0.4796999 , -0.53643286, -0.57631475, -0.3853297 ,  0.41003042],
        [-0.26340643,  0.5624035 , -0.43809342, -0.08927061,  0.3437686 ,
          0.13532296, -0.25898913, -0.44747776, -0.13408372, -0.03619962]],
       dtype=float32)>,
 <tf.Variable 'dense_2/bias:0' shape=(10,) dtype=float32, numpy=
 array([-0.03283736, -0.00368406, -0.03254008, -0.01275273, -0.00168085,
        -0.02191626, -0

In [None]:
# Define Train Step
def step(real_x, real_y):
    with tf.GradientTape() as tape:
        # Make prediction
        pred_y = model(real_x.reshape((-1, 64, 64, 64,3)))
        # Calculate loss
        model_loss = tf.keras.losses.mse(real_y, pred_y)
    
    # Calculate gradients
    model_gradients = tape.gradient(model_loss, model.trainable_variables)
    
    # Update model
    continual_learning(model.trainable_variables,model_gradients,param_wise_lr)
    #optimizer.apply_gradients(zip(model_gradients, model.trainable_variables))

In [None]:
#Define Learning Rate Updates for Continual Learning

def continual_learning(model_variables,model_gradients,param_wise_lr):
    #Iterate through all variables to optimize at diffrent rates 
    new_grad_vars=[]
    for grad,var in model_gradients:
        grad*=param_wise_lr
        new_grad_vars.append((grad,var))
        
    optimizer.apply_gradients(new_grad_vars)
    

In [None]:
# Training loop
bat_per_epoch = math.floor(len(x_train) / batch_size)
for epoch in range(epochs):
    print('=', end='')
    for i in range(bat_per_epoch):
        n = i*batch_size
        step(x_train[n:n+batch_size], y_train[n:n+batch_size])

In [None]:
# Compile for evluation if required
model.compile(optimizer=optimizer, loss=tf.keras.losses.categorical_crossentropy, metrics=['acc']) # Compile just for evaluation