In [432]:
import gym
import popgym
import numpy as np
from popgym.envs.stateless_cartpole import StatelessCartPole
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
import random
from tqdm import trange


In [433]:
import tensorflow as tf
import numpy as np
import tqdm

In [434]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))

Num GPUs: 1


In [435]:
GAMMA = 0.95
LEARNING_RATE = 0.001

MEMORY_SIZE = 1000000
BATCH_SIZE = 10

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.9

In [436]:
state_space=4
obs_space=2
action_space=2
intermediate_dim=16

In [437]:
env=StatelessCartPole()


In [438]:
print(env.action_space)
print(env.observation_space.shape[0])

Discrete(2)
2


In [439]:
class DQNSolver:

    def __init__(self, state_space, action_space):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)

        self.model = Sequential()
        self.model.add(Dense(8, input_shape=(
            state_space,), activation="relu"))
        self.model.add(BatchNormalization())
        self.model.add(Dense(4, activation="relu"))
        self.model.add(BatchNormalization())
        self.model.add(Dense(self.action_space, activation="linear"))
        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))

        self.target_model = Sequential()
        self.target_model.add(Dense(8, input_shape=(
            state_space,), activation="relu"))
        self.target_model.add(BatchNormalization())
        self.target_model.add(Dense(4, activation="relu"))
        self.target_model.add(BatchNormalization())
        self.target_model.add(Dense(self.action_space, activation="linear"))
        self.target_model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))

    def remember(self, state, action, reward, next_state, done):
        # if (len(self.memory) > 0):
        #     self.memory.pop()
        self.memory.append((state, action, reward, next_state, done))
    
    def forget(self):
        self.memory.clear()

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.action_space)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])
    def update_target_model(self):
        """
        Updates the current target_q_net with the q_net which brings all the
        training in the q_net to the target_q_net.
        :return: None
        """
        self.target_model.set_weights(self.model.get_weights())


    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        batch = random.sample(self.memory, BATCH_SIZE)
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if not terminal:
                q_update = (reward + GAMMA *
                            np.amax(self.target_model.predict(state_next)[0]))
            q_values = self.model.predict(state)
            q_values[0][int(action)] = q_update
            self.model.fit(state, q_values, verbose=0)
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)

In [440]:
class Encoder(tf.keras.layers.Layer):
  def __init__(self,state_space, intermediate_dim):
    super(Encoder, self).__init__()
    self.input_layer=tf.keras.layers.InputLayer(input_shape=(7,))
    self.hidden_layer1=tf.keras.layers.Dense(
      units=intermediate_dim,
      activation=tf.nn.relu,
      kernel_initializer='he_uniform'
    )
    #self.batchnorm_layer1=tf.keras.layers.BatchNormalization()
    self.hidden_layer2 = tf.keras.layers.Dense(
      units=intermediate_dim,
      activation=tf.nn.relu,
      kernel_initializer='he_uniform'
    )
    #self.batchnorm_layer2=tf.keras.layers.BatchNormalization()
    self.output_layer = tf.keras.layers.Dense(
      units=state_space,
      activation=tf.keras.activations.linear
    )
    
  def call(self, input_features):
    activation0=self.input_layer(input_features)
    activation1 = self.hidden_layer1(activation0)
    #activation1=self.batchnorm_layer1(activation0)
    activation2=self.hidden_layer2(activation1)
    #activation2=self.batchnorm_layer2(activation1)
    return self.output_layer(activation2)

In [441]:
dqn_solver=DQNSolver(state_space, action_space)




In [442]:
class Decoder(tf.keras.layers.Layer):
  def __init__(self, intermediate_dim, obs_space):
    super().__init__()
    self.hidden_layer1 = tf.keras.layers.Dense(
      units=intermediate_dim,
      activation=tf.nn.relu,
      kernel_initializer='he_uniform'
    )
    #self.batchnorm_layer1=tf.keras.layers.BatchNormalization()
    self.hidden_layer2 = tf.keras.layers.Dense(
      units=intermediate_dim,
      activation=tf.nn.relu,
      kernel_initializer='he_uniform'
    )
    #self.batchnorm_layer2=tf.keras.layers.BatchNormalization()
    self.output_layer = tf.keras.layers.Dense(
      units=obs_space,
      activation=tf.keras.activations.linear
    )
  
  def call(self, input_features,var):
    if(len(input_features.get_shape())==2):
      activation1 = self.hidden_layer1(tf.concat((input_features,var),axis=1))
    else:
      temp=[]
      for i in range(len(input_features)):
          temp.append(tf.concat((input_features[i],var[i]),axis=1))
      temp=tf.convert_to_tensor(temp)
      activation1 = self.hidden_layer1(temp)
    #activation2=self.batchnorm_layer1(activation1)
    activation2=self.hidden_layer2(activation1)
    #activation4=self.batchnorm_layer2(activation3)

    return self.output_layer(activation2)

In [443]:
class Autoencoder(tf.keras.Model):
  def __init__(self, intermediate_dim, state_space,obs_space):
    super(Autoencoder, self).__init__()
    self.encoder = Encoder(intermediate_dim=intermediate_dim,state_space=state_space)
    
    self.decoder = Decoder(
      intermediate_dim=intermediate_dim,
      obs_space=obs_space
    )
    
  
  def call(self, input_features,var):
    code = self.encoder(input_features)
    #self.var=tf.Variable(var,trainable=False)
    reconstructed = self.decoder(code,var)
    return reconstructed


opt = tf.optimizers.Adam(learning_rate=LEARNING_RATE)

In [444]:

def loss(model,input_features, obs,var):
  reconstruction_error = tf.reduce_mean(tf.square(tf.subtract(model(input_features,var), obs)))
  return reconstruction_error
  
def train(loss, model, opt,input_features,obs,var):
  with tf.GradientTape() as tape:
    gradients = tape.gradient(loss(model,input_features, obs,var), model.trainable_variables)
  gradient_variables = zip(gradients, model.trainable_variables)
  opt.apply_gradients(gradient_variables)

In [445]:
obs=env.reset()
obs

array([ 0.02763797, -0.00338564,  0.03090805,  0.0018308 ], dtype=float32)

In [446]:
autoencoder = Autoencoder(
  intermediate_dim=intermediate_dim,
  state_space=state_space,obs_space=obs_space
)


In [447]:
def data_collection(total_episodes,dqn_solver,autoencoder):

    S_series=[]
    S_actual_series=[]
    O_series=[]
    U_series=[]
    R_series=[]
    O_predicted_series=[]
    Done_series=[]
    O_series.append(np.array([[0.0,0.0]]))
    O_predicted_series.append(np.array([[0.0,0.0]]))
    encoder=autoencoder.encoder
    decoder=autoencoder.decoder

    ep_no=0
    i=0
    while(ep_no<total_episodes):
        s0=env.reset()
        #s0=np.reshape(s0,[1,state_space])
        
        step=0
        done=False
        while not done:
            if(step==0):
                s=s0
                S_actual_series.append(s)
            else:
                s=encoder(tf.concat((S_series[i-1],O_series[i],U_series[i-1]),axis=1))
                s=np.reshape(s,[1,state_space])
                # s=s.tolist()
                # s[0][0]=np.tanh(s[0][0])*4.8
                # s[0][2]=np.tanh(s[0][2])*0.418
                # s=np.array(s)
                

            s=np.reshape(s,[1,state_space])
            S_series.append(s)
            action=dqn_solver.act(s)

            actual_state,obs, reward, done, info = env.step(int(action))
            reward=reward*200
            Done_series.append(done)
            actual_state=np.reshape(actual_state,[1,state_space])
            S_actual_series.append(actual_state)
            action=np.array([[action]])
            action=action.astype('float32')
            U_series.append(action)
            obs=np.reshape(obs,[1,obs_space])

            O_series.append(obs)
            reward = reward if not done else -reward
            R_series.append(reward)
            # print(s.shape)
            # print(action.shape)
            obs_pred=decoder(s,action)
            obs_pred=np.reshape(obs_pred,[1,obs_space])
            obs_pred=obs_pred.tolist()
            # obs_pred[0][0]=np.tanh(obs_pred[0][0])*4.8
            # obs_pred[0][1]=np.tanh(obs_pred[0][1])*0.418
            obs_pred=np.array(obs_pred)
            O_predicted_series.append(obs_pred)
            i+=1
            step+=1
        ep_no+=1
    return S_series,O_series,U_series,R_series,O_predicted_series,S_actual_series,Done_series


In [448]:
def autoencoder_training(epochs,autoencoder,S_series,O_series,U_series):
    for epoch in trange(epochs):
        for i in range(2,len(S_series)-1):
            train(loss,autoencoder,opt,tf.concat((S_series[i-1],O_series[i],U_series[i-1]),axis=1),O_series[i+1],U_series[i])



In [449]:
def dqn_training(dqn_solver,epochs,S_series,U_series,R_series,Done_series):
    dqn_solver.forget()
    for i in range(len(S_series)-1):
        dqn_solver.remember(S_series[i],U_series[i][0][0],R_series[i],S_series[i+1],Done_series[i])
    for j in trange(epochs):
        dqn_solver.experience_replay()
        if(j%2==0):
            dqn_solver.update_target_model()

In [450]:
S_series,O_series,U_series,R_series,O_predicted_series,S_actual_series,Done_series=data_collection(10,dqn_solver,autoencoder)


In [451]:
type(U_series[0])


numpy.ndarray

In [452]:
print(S_series[0])
print(O_series[1])
print(U_series[0])

print(tf.concat((S_series[2-1],O_series[2],U_series[2-1]),axis=1))

[[-0.00490154 -0.02971714  0.00840055 -0.0288267 ]]
[[-0.00549588  0.00782401]]
[[0.]]
tf.Tensor(
[[-0.00297406  0.00465151  0.03123135  0.00937326 -0.00999505  0.01315391
   0.        ]], shape=(1, 7), dtype=float32)


In [453]:
#U_series=[float(i) for i in U_series]
# S_dataset=tf.data.Dataset.from_tensor_slices(S_series[1:-2]).batch(10)
# O_dataset=tf.data.Dataset.from_tensor_slices(O_series[2:-1]).batch(10)
# U_dataset=tf.data.Dataset.from_tensor_slices(U_series[1:-2]).batch(10)
# O_result_dataset=tf.data.Dataset.from_tensor_slices(O_series[3:]).batch(10)
# var_dataset=tf.data.Dataset.from_tensor_slices(U_series[2:-1]).batch(10)



In [454]:
autoencoder(tf.concat((S_series[1-1],O_series[1],U_series[1-1]),axis=1),U_series[1])

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-0.00310687, -0.0075161 ]], dtype=float32)>

In [455]:
temp1=[]
temp2=[]
temp3=[]
for i in range(2,len(S_series)-1):
    temp1.append(tf.concat((S_series[i-1],O_series[i],U_series[i-1]),axis=1))
    temp2.append(O_series[i+1])
    temp3.append(U_series[i])
input_features_dataset=tf.data.Dataset.from_tensor_slices(temp1).batch(10)
O_actual_dataset=tf.data.Dataset.from_tensor_slices(temp2).batch(10)
var_dataset=tf.data.Dataset.from_tensor_slices(temp3).batch(10)
    

In [456]:
for x in input_features_dataset:
    print(x.numpy())
    break

[[[-0.00297406  0.00465151  0.03123135  0.00937326 -0.00999505
    0.01315391  0.        ]]

 [[ 0.00842325  0.02351869 -0.00804043  0.00349187 -0.01839888
    0.02438661  0.        ]]

 [[ 0.01398245  0.03210745  0.0052354   0.00871718 -0.03070879
    0.04155527  0.        ]]

 [[ 0.02796906  0.0569975   0.00841101  0.0215654  -0.0469276
    0.06472894  1.        ]]

 [[ 0.39902413  0.19259483  0.47200924  0.864065   -0.05925529
    0.08231522  0.        ]]

 [[ 0.4302613   0.74473345  0.16851896  0.3302306  -0.07550175
    0.1061477   1.        ]]

 [[-0.05958858  0.61564875  1.031866    1.2448192  -0.08786892
    0.1246644   0.        ]]

 [[ 0.9139001   1.295408    0.05375206  0.48692682 -0.10416375
    0.14966242  0.        ]]

 [[ 0.07027406  1.5104017   1.297411    0.8471776  -0.12438817
    0.1812402   1.        ]]

 [[ 0.1944651   1.2958391   1.5167856   1.9622836  -0.14075148
    0.20796777  1.        ]]]


In [457]:
x=tf.data.Dataset.zip((input_features_dataset, O_actual_dataset))
for y in x:
    print(y[1])
    break

tf.Tensor(
[[[-0.01839888  0.02438661]]

 [[-0.03070879  0.04155527]]

 [[-0.0469276   0.06472894]]

 [[-0.05925529  0.08231522]]

 [[-0.07550175  0.1061477 ]]

 [[-0.08786892  0.1246644 ]]

 [[-0.10416375  0.14966242]]

 [[-0.12438817  0.1812402 ]]

 [[-0.14075148  0.20796777]]

 [[-0.15326606  0.23007672]]], shape=(10, 1, 2), dtype=float32)


In [458]:
it=iter(O_actual_dataset)
print(next(it))

tf.Tensor(
[[[-0.01839888  0.02438661]]

 [[-0.03070879  0.04155527]]

 [[-0.0469276   0.06472894]]

 [[-0.05925529  0.08231522]]

 [[-0.07550175  0.1061477 ]]

 [[-0.08786892  0.1246644 ]]

 [[-0.10416375  0.14966242]]

 [[-0.12438817  0.1812402 ]]

 [[-0.14075148  0.20796777]]

 [[-0.15326606  0.23007672]]], shape=(10, 1, 2), dtype=float32)


In [459]:
final_dataset=tf.data.Dataset.zip((input_features_dataset, O_actual_dataset,var_dataset))

In [460]:
temp_dataset=input_features_dataset.concatenate(var_dataset)
for x in temp_dataset:
    print(x)
    break

tf.Tensor(
[[[-0.00297406  0.00465151  0.03123135  0.00937326 -0.00999505
    0.01315391  0.        ]]

 [[ 0.00842325  0.02351869 -0.00804043  0.00349187 -0.01839888
    0.02438661  0.        ]]

 [[ 0.01398245  0.03210745  0.0052354   0.00871718 -0.03070879
    0.04155527  0.        ]]

 [[ 0.02796906  0.0569975   0.00841101  0.0215654  -0.0469276
    0.06472894  1.        ]]

 [[ 0.39902413  0.19259483  0.47200924  0.864065   -0.05925529
    0.08231522  0.        ]]

 [[ 0.4302613   0.74473345  0.16851896  0.3302306  -0.07550175
    0.1061477   1.        ]]

 [[-0.05958858  0.61564875  1.031866    1.2448192  -0.08786892
    0.1246644   0.        ]]

 [[ 0.9139001   1.295408    0.05375206  0.48692682 -0.10416375
    0.14966242  0.        ]]

 [[ 0.07027406  1.5104017   1.297411    0.8471776  -0.12438817
    0.1812402   1.        ]]

 [[ 0.1944651   1.2958391   1.5167856   1.9622836  -0.14075148
    0.20796777  1.        ]]], shape=(10, 1, 7), dtype=float32)


In [461]:
# for x in final_dataset:
#     train(loss,autoencoder,opt,x[0],x[1],x[2])

In [474]:
for x in final_dataset:
    
    #print(autoencoder.encoder(x[0]))
    # input_features_dataset_temp=tf.data.Dataset.from_tensor_slices(autoencoder.encoder(x[0]))
    # var_dataset_temp=tf.data.Dataset.from_tensor_slices(x[2])
    z=autoencoder.encoder(x[0])
    print(len(z))
    print(len(z.get_shape()))
    print(len(z[0].get_shape()))
    temp=[]
    for i in range(10):
        
        temp.append(tf.concat((z[i],x[2][i]),axis=1))
    temp=tf.convert_to_tensor(temp)
    print(temp)
   
        
    break

10
3
2
tf.Tensor(
[[[-0.43772095 -0.19160041 -0.43179038 -0.2916193   0.        ]]

 [[-0.38426065 -0.23010026 -0.4423437  -0.22566229  0.        ]]

 [[-0.38148636 -0.24775627 -0.44835502 -0.21562107  1.        ]]

 [[-0.40194404  0.2634282  -0.00629877  0.12507504  0.        ]]

 [[-1.0111655   0.2361565  -0.08313033 -0.82170755  1.        ]]

 [[-1.0894464   0.14591056 -0.26365077  0.00449681  0.        ]]

 [[-0.69270223  0.13748094  0.03252822 -0.37928012  0.        ]]

 [[-0.51916474 -0.02602741 -0.4224973  -0.5391094   1.        ]]

 [[-0.5657909   0.20481834  0.24166316  0.84901935  1.        ]]

 [[-0.86801624  0.7242854   0.1531363   0.20503777  0.        ]]], shape=(10, 1, 5), dtype=float32)


In [463]:
rewards_storage=[]

In [464]:
def autoencoder_training_2(epochs,autoencoder,S_series,O_series,U_series):
    temp1=[]
    temp2=[]
    temp3=[]
    for i in range(2,len(S_series)-1):
        temp1.append(tf.concat((S_series[i-1],O_series[i],U_series[i-1]),axis=1))
        temp2.append(O_series[i+1])
        temp3.append(U_series[i])
    input_features_dataset=tf.data.Dataset.from_tensor_slices(temp1).batch(10)
    O_actual_dataset=tf.data.Dataset.from_tensor_slices(temp2).batch(10)
    var_dataset=tf.data.Dataset.from_tensor_slices(temp3).batch(10)
    final_dataset=tf.data.Dataset.zip((input_features_dataset, O_actual_dataset,var_dataset))
    
    for epoch in trange(epochs):
        for x in final_dataset:
            train(loss,autoencoder,opt,x[0],x[1],x[2])

In [465]:
def complete_loop(autoencoder,dqn_solver,epochs,EXPLORATION_MAX):
    for epoch in range(epochs):
        S_series,O_series,U_series,R_series,O_predicted_series,S_actual_series,Done_series=data_collection(100,dqn_solver,autoencoder)
        print("epoch: {}, total reward: {}".format(epoch, sum(R_series)))
        rewards_storage.append(sum(R_series))
        if(epoch%2==0):
            dqn_training(dqn_solver,20,S_series,U_series,R_series,Done_series)
            dqn_solver.exploration_rate=EXPLORATION_MAX
            EXPLORATION_MAX*=0.95
        else:
            autoencoder_training_2(10,autoencoder,S_series,O_series,U_series)

        
        

        


In [466]:
complete_loop(autoencoder,dqn_solver,4,EXPLORATION_MAX=1.0)

epoch: 0, total reward: 2264.0


100%|██████████| 20/20 [00:21<00:00,  1.08s/it]


epoch: 1, total reward: 2103.0


100%|██████████| 10/10 [00:42<00:00,  4.28s/it]


epoch: 2, total reward: 1796.0


 40%|████      | 8/20 [00:09<00:14,  1.22s/it]


KeyboardInterrupt: 

In [None]:
autoencoder.get_weights()

[array([[ 0.20407882, -0.23334798, -0.23918504, -0.5291099 , -0.7303084 ,
          0.87317455,  0.56289804, -0.70826864,  0.07231956,  0.58529794,
         -0.644903  ,  0.72572047, -0.7585182 , -0.23582321, -0.34132308,
          0.34260297],
        [-0.57661504, -0.38892895, -0.22276026,  0.5406464 , -0.38851607,
         -0.36778143,  0.82569045,  0.62966347,  0.49293634,  0.10445686,
         -0.41051292,  0.4032058 ,  0.73918945,  0.75191957, -0.00779168,
          0.17806192],
        [-0.5911713 , -0.6467045 , -0.64828837,  0.5615653 ,  0.3837878 ,
         -0.48843122,  0.2434346 , -0.8506665 , -0.0658987 ,  0.44311896,
         -0.18812814, -0.28044367, -0.65841657, -0.7414347 ,  0.7854104 ,
         -0.14971481],
        [-0.01277739, -0.42608514,  0.4171245 ,  0.5651036 ,  0.15420617,
          0.39194557, -0.58921504,  0.15459795,  0.6758396 , -0.04311546,
          0.13534532, -0.47579995,  0.11670901,  0.1934568 ,  0.5708729 ,
         -0.00474983],
        [ 0.18339984

In [None]:
autoencoder.encoder.get_weights()

[array([[ 0.20407882, -0.23334798, -0.23918504, -0.5291099 , -0.7303084 ,
          0.87317455,  0.56289804, -0.70826864,  0.07231956,  0.58529794,
         -0.644903  ,  0.72572047, -0.7585182 , -0.23582321, -0.34132308,
          0.34260297],
        [-0.57661504, -0.38892895, -0.22276026,  0.5406464 , -0.38851607,
         -0.36778143,  0.82569045,  0.62966347,  0.49293634,  0.10445686,
         -0.41051292,  0.4032058 ,  0.73918945,  0.75191957, -0.00779168,
          0.17806192],
        [-0.5911713 , -0.6467045 , -0.64828837,  0.5615653 ,  0.3837878 ,
         -0.48843122,  0.2434346 , -0.8506665 , -0.0658987 ,  0.44311896,
         -0.18812814, -0.28044367, -0.65841657, -0.7414347 ,  0.7854104 ,
         -0.14971481],
        [-0.01277739, -0.42608514,  0.4171245 ,  0.5651036 ,  0.15420617,
          0.39194557, -0.58921504,  0.15459795,  0.6758396 , -0.04311546,
          0.13534532, -0.47579995,  0.11670901,  0.1934568 ,  0.5708729 ,
         -0.00474983],
        [ 0.18339984

In [None]:
autoencoder.decoder.get_weights()

[array([[ 0.2885225 , -0.89066386,  0.64410186, -0.18113177, -0.0710882 ,
         -0.33205456, -0.0348258 , -1.1914519 , -0.26268044, -0.9786924 ,
         -0.85268843, -0.28047556, -0.44978425,  0.3495759 ,  0.92274004,
          0.00333026],
        [ 0.54400826,  0.17340857,  0.26654753,  0.04388787, -0.4862292 ,
         -0.81445354, -0.64160204,  0.60329294,  0.2624524 ,  0.59104395,
          0.04866523, -0.96086335,  0.75993615,  0.96409595, -0.8663891 ,
          0.6054294 ],
        [-0.00551882, -0.64640725,  0.39964157, -0.6682794 , -1.1363765 ,
         -0.52086073,  0.31894025,  0.11176072, -0.42010298,  0.20078796,
          0.57834214, -0.9793734 ,  1.019054  , -0.8089541 , -1.2127557 ,
         -0.19722524],
        [-0.0797565 ,  0.0869357 ,  0.6336706 ,  0.01991878,  0.80748004,
         -0.85445994,  0.30480716, -0.83834416,  0.4656608 ,  0.50081897,
          0.46245548,  0.7759139 , -0.5118521 , -0.44874564,  0.24810715,
         -0.23923713],
        [-1.050454  

In [None]:
autoencoder.decoder.batchnorm_layer1.weights

[<tf.Variable 'decoder/batch_normalization_6/gamma:0' shape=(16,) dtype=float32, numpy=
 array([0.93840057, 0.8926945 , 0.66052014, 0.8010094 , 1.0011221 ,
        0.74727535, 0.8526781 , 0.73844445, 0.75014347, 0.69470066,
        0.7478063 , 0.885031  , 0.77848   , 0.80480886, 0.69570357,
        0.7179329 ], dtype=float32)>,
 <tf.Variable 'decoder/batch_normalization_6/beta:0' shape=(16,) dtype=float32, numpy=
 array([-0.01454996, -0.06398325, -0.08782358, -0.01497211, -0.00589212,
        -0.04727145, -0.05022665, -0.10407288, -0.04063736, -0.02064782,
         0.01155298, -0.00582028, -0.15242551, -0.16460721, -0.12766598,
        -0.07940015], dtype=float32)>,
 <tf.Variable 'decoder/batch_normalization_6/moving_mean:0' shape=(16,) dtype=float32, numpy=
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       dtype=float32)>,
 <tf.Variable 'decoder/batch_normalization_6/moving_variance:0' shape=(16,) dtype=float32, numpy=
 array([1., 1., 1., 1., 1., 1., 1., 