In [25]:
import os
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam

In [17]:
# how to build a model?

# method 1
model1 = keras.Sequential([
    Input(shape=(4, )), # Input layer: don't include batch dimension
    # Dense(10, activation='relu', input_shape=(4, )) # Input layer
    Dense(10, activation='relu'), # 1st hidden layer
    Dense(10, activation='relu'), # 2nd hidden layer
    Dense(2, activation='softmax')  # Output layer
])
print(model1.summary())

# method 2
model2 = keras.Sequential()
model2.add(Input(shape=(4, ))) # Input layer
model2.add(Dense(10, activation='relu'))
model2.add(Dense(10, activation='relu'))
model2.add(Dense(2, activation='softmax'))
# print(model2.summary())

# method 3
input = Input(shape=(4, )) # Input layer
hidden1 = Dense(10, activation='relu')(input)
hidden2 = Dense(10, activation='relu')(hidden1)
output = Dense(2, activation='softmax')(hidden2)

model3 = keras.Model(inputs=input, outputs=output)
# print(model2.summary())

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_30 (Dense)            (None, 10)                50        
                                                                 
 dense_31 (Dense)            (None, 10)                110       
                                                                 
 dense_32 (Dense)            (None, 2)                 22        
                                                                 
Total params: 182 (728.00 Byte)
Trainable params: 182 (728.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [15]:
# compile the models
model1.compile(optimizer=Adam(learning_rate=0.001)) # (learning_rate=0.001)
model2.compile(optimizer=Adam(learning_rate=0.001))
model3.compile(optimizer=Adam(learning_rate=0.001))

In [18]:
# extract the weights from a model
w = model1.weights
w

[<tf.Variable 'dense_30/kernel:0' shape=(4, 10) dtype=float32, numpy=
 array([[ 0.09325033, -0.20294166,  0.09878844,  0.64578044, -0.63944006,
          0.59449995, -0.20723858,  0.20183253, -0.5635318 ,  0.34384525],
        [-0.38034323, -0.27349865,  0.563905  ,  0.2688073 , -0.47609037,
         -0.2881711 , -0.17242974, -0.23937953, -0.31050965, -0.03825796],
        [ 0.1402908 ,  0.4667977 , -0.5144813 ,  0.6535865 , -0.49980918,
         -0.16105825,  0.07418245,  0.4692515 ,  0.55717576, -0.07125938],
        [ 0.51424706,  0.5204617 , -0.25585568,  0.6091745 , -0.29669598,
          0.55733025, -0.23244232,  0.02863443,  0.5796647 , -0.1983588 ]],
       dtype=float32)>,
 <tf.Variable 'dense_30/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>,
 <tf.Variable 'dense_31/kernel:0' shape=(10, 10) dtype=float32, numpy=
 array([[ 0.00877482, -0.5467702 , -0.20952618,  0.15147287,  0.47414947,
         -0.38977093, -0.2840522 ,

In [22]:
w = model2.weights
w

[<tf.Variable 'dense_33/kernel:0' shape=(4, 10) dtype=float32, numpy=
 array([[ 0.35586226, -0.33619928, -0.02299941, -0.5222022 , -0.535032  ,
          0.5360223 ,  0.01422513, -0.34894523, -0.4169281 ,  0.1286267 ],
        [-0.03263259, -0.5042227 ,  0.12080592,  0.45262587, -0.5033421 ,
          0.3105017 , -0.01988155,  0.1636414 ,  0.08690733,  0.08733904],
        [-0.22268224, -0.15547273, -0.44754276, -0.3551866 , -0.24212393,
         -0.23959151, -0.6357609 ,  0.09109968,  0.5155623 , -0.08088726],
        [ 0.5672389 , -0.08094394,  0.47193754,  0.39443612,  0.6393901 ,
         -0.2700722 , -0.1607717 , -0.4621228 ,  0.4997052 ,  0.45624936]],
       dtype=float32)>,
 <tf.Variable 'dense_33/bias:0' shape=(10,) dtype=float32, numpy=array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>,
 <tf.Variable 'dense_34/kernel:0' shape=(10, 10) dtype=float32, numpy=
 array([[-0.44231752,  0.37223488, -0.09476244, -0.43646434, -0.2377944 ,
          0.24874872, -0.18379813,

In [21]:
type(w[0])

tensorflow.python.ops.resource_variable_ops.ResourceVariable

In [23]:

# =============================== REPLAY BUFFER ===============================
class ReplayBuffer:
    def __init__(self, max_size, state_shape, n_actions):
        self.mem_size = max_size
        self.mem_cntr = 0

        self.state_memory     = np.zeros((self.mem_size, *state_shape))
        self.action_memory    = np.zeros((self.mem_size, n_actions))
        self.reward_memory    = np.zeros(self.mem_size)
        self.new_state_memory = np.zeros((self.mem_size, *state_shape))
        self.terminal_memory  = np.zeros(self.mem_size, dtype=np.bool) # using np.bool is really useful when pytorch is used.

    def store_transition(self, state, action, reward, new_state, done):
        index = self.mem_cntr % self.mem_size # implement a queue

        self.state_memory[index]     = state
        self.action_memory[index]    = action
        self.reward_memory[index]    = reward
        self.new_state_memory[index] = new_state
        self.terminal_memory[index]  = done # problematic !!!

        self.mem_cntr += 1

    def sample_buffer(self, batch_size):
        max_mem = min(self.mem_cntr, self.mem_size)

        batch = np.random.choice(max_mem, batch_size, replace=False) # replace = False -> in a single batch, no element gets sampled more than once. 

        states     = self.state_memory[batch]
        actions    = self.action_memory[batch]
        rewards    = self.reward_memory[batch]
        new_states = self.new_state_memory[batch]
        dones      = self.terminal_memory[batch]

        return states, actions, rewards, new_states, dones



# =============================== CRITIC NETWORK ===============================
class CriticNetwork(keras.Model):
    def __init__(
            self,
            name="critic", # model name (required by tf.keras.Model)
            fc1_dims=512,
            fc2_dims=512,
            chkpt_dir='tmp/ddpg/'
    ):
        super(CriticNetwork, self).__init__()

        self.model_name = name # do not use 'self.model'; it is a reserved variable name in tf
        self.checkpoint_dir  = chkpt_dir
        self.checkpoint_file = os.path.join(self.checkpoint_dir, self.model_name, '_ddpg.h5') 
        # extensions for saving keras models: legacy '.h5' -> TF 1.X, '.tf' -> TF 2.X

        self.fc1_dims = fc1_dims
        self.fc2_dims = fc2_dims

        # define network layers 
        self.fc1 = Dense(self.fc1_dims, activation='relu')
        self.fc2 = Dense(self.fc2_dims, activation='relu')
        self.q   = Dense(1, activation=None)

    def call(self, state, action):
        temp1 = self.fc1(tf.concat([state, action], axis=1)) # axis 0 -> batch dimension
        # ######################## PROBLEM ########################
        # according to the paper, actions were not included until the 2nd hidden layer of Q
        temp2 = self.fc2(temp1)
        q_value = self.q(temp2)

        return q_value

# ================================ ACTOR NETWORK ===============================
class ActorNetwork(keras.Model):
    def __init__(
            self,
            name="actor", # model name (required by tf.keras.Model)
            n_actions=2, # action shape (dimenisonality of action space)
            fc1_dims=512,
            fc2_dims=512,
            chkpt_dir='tmp/ddpg/'
    ):
        super(ActorNetwork, self).__init__()

        self.model_name = name # do not use 'self.model'; it is a reserved variable name in tf
        self.checkpoint_dir  = chkpt_dir
        self.checkpoint_file = os.path.join(self.checkpoint_dir, self.model_name, '_ddpg.h5') 

        self.n_actions = n_actions
        self.fc1_dims = fc1_dims
        self.fc2_dims = fc2_dims

        # define network layers
        self.fc1 = Dense(self.fc1_dims,  activation='relu')
        self.fc2 = Dense(self.fc2_dims,  activation='relu')
        self.mu  = Dense(self.n_actions, activation='tanh') # action is bounded by +/- 1

    def call(self, state):
        temp1  = self.fc1(state)
        temp2  = self.fc2(temp1)
        action = self.mu(temp2)

        return action


In [34]:
actor = ActorNetwork(n_actions=3, fc1_dims=10, fc2_dims=10)
critic = CriticNetwork(fc1_dims=10, fc2_dims=10)
state = np.random.rand(10, 3).astype(np.float32)
action = actor(state)
print(actor.summary())
print("action:", action)
value = critic(state, action)
print("value:", value)

Model: "actor_network_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_78 (Dense)            multiple                  40        
                                                                 
 dense_79 (Dense)            multiple                  110       
                                                                 
 dense_80 (Dense)            multiple                  33        
                                                                 
Total params: 183 (732.00 Byte)
Trainable params: 183 (732.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
action: tf.Tensor(
[[-0.13529369 -0.0278967   0.16720283]
 [-0.14543913 -0.08192513  0.1533877 ]
 [-0.22853766 -0.07132147  0.26780415]
 [-0.3539786  -0.15822871  0.39385724]
 [-0.3206752  -0.14606568  0.37063336]
 [-0.19451684 -0.0793639   0.2327903 ]
 [-0.25600296 -0.11

In [43]:
# forcefully overwrite one value to 1
value_ = np.arange(10); print(value_)
value_tensor = tf.convert_to_tensor(value_, dtype=np.uint8); print(value_tensor)
tf.squeeze(value_tensor)

[0 1 2 3 4 5 6 7 8 9]
tf.Tensor([0 1 2 3 4 5 6 7 8 9], shape=(10,), dtype=uint8)


<tf.Tensor: shape=(10,), dtype=uint8, numpy=array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)>

In [56]:
# tf.GradientTape()
x = tf.Variable(1.0)

with tf.GradientTape() as tape:
    y = x**3 + 2*x**2 - x + 1
dy_dx = tape.gradient(y, x)
print(dy_dx)
# dy_dx = tape.gradient(y, x)

tf.Tensor(6.0, shape=(), dtype=float32)


In [54]:
# trainable and non-trainable weights

layer = tf.keras.layers.BatchNormalization()
layer.build((None, 4))  # Create the weights

print("weights:", len(layer.weights))
print("trainable_weights:", len(layer.trainable_weights))
print("non_trainable_weights:", len(layer.non_trainable_weights))


weights: 4
trainable_weights: 2
non_trainable_weights: 2
