# DQN

In this notebook, we want to implement DQN algorithm. Code mainly is from the RL course at Aalto university and the pytorch tutorial for DQN:
https://pytorch.org/tutorials/intermediate/reinforcement_q_learning.html

In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt

import gym

import random

from collections import namedtuple

from itertools import count

%matplotlib inline

In [2]:
import warnings
warnings.simplefilter("error")
warnings.simplefilter("ignore", UserWarning)

In [3]:
torch.manual_seed(1)
np.random.seed(1)

In [4]:
env_name = "CartPole-v0"
env = gym.make(env_name)
env.seed(1)

[1]

In [5]:
env.observation_space, env.action_space

(Box(4,), Discrete(2))

In [6]:
TARGET_UPDATE = 10
glie_a = 5
num_episodes = 10000
hidden = 256
gamma = 0.999
replay_buffer_size = 10000
batch_size = 128
eps_stop = 0.05

In [7]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward', 'done'))

class ReplayMemory(object):
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, *args):
        """Saves a transition."""
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = Transition(*args)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [8]:
class DQN(nn.Module):
    def __init__(self, state_space_dim, action_space_dim, hidden=12):
        super(DQN, self).__init__()
        self.hidden = hidden
        self.fc1 = nn.Linear(state_space_dim, hidden)
        self.fc2 = nn.Linear(hidden, hidden//2)
        self.fc3 = nn.Linear(hidden//2, action_space_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x

In [25]:
class DQNAgent(object):
    def __init__(self, state_space, n_actions, replay_buffer_size=50000,
                 batch_size=32, hidden_size=12, gamma=0.98):
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.n_actions = n_actions
        self.state_space_dim = state_space
        self.policy_net = DQN(state_space, n_actions, hidden_size).to(self.device)
        self.target_net = DQN(state_space, n_actions, hidden_size).to(self.device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()

        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=1e-3)
        self.memory = ReplayMemory(replay_buffer_size)
        self.batch_size = batch_size
        self.gamma = gamma

    def update_network(self, updates=1):
        for _ in range(updates):
            self._do_network_update()

    def _do_network_update(self):
        if len(self.memory) < self.batch_size:
            return
        transitions = self.memory.sample(self.batch_size)
        # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
        # detailed explanation). This converts batch-array of Transitions
        # to Transition of batch-arrays.
        batch = Transition(*zip(*transitions))

        # Compute a mask of non-final states and concatenate the batch elements
        # (a final state would've been the one after which simulation ended)
        non_final_mask = 1 - torch.tensor(batch.done, dtype=torch.uint8)
        non_final_next_states = [s for nonfinal,s in zip(non_final_mask,
                                     batch.next_state) if nonfinal > 0]
        non_final_next_states = torch.stack(non_final_next_states).to(self.device)
        state_batch = torch.stack(batch.state).to(self.device)
        action_batch = torch.cat(batch.action).to(self.device)
        reward_batch = torch.cat(batch.reward).to(self.device)

        self.optimizer.zero_grad()
        # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
        # columns of actions taken. These are the actions which would've been taken
        # for each batch state according to policy_net
        print("network_update - Policy", state_batch.shape, action_batch.shape)
        state_action_values = self.policy_net(state_batch).gather(1, action_batch)
        
        # Compute V(s_{t+1}) for all next states.
        # Expected values of actions for non_final_next_states are computed based
        # on the "older" target_net; selecting their best reward with max(1)[0].
        # This is merged based on the mask, such that we'll have either the expected
        # state value or 0 in case the state was final.
        # about detach(): https://discuss.pytorch.org/t/detach-no-grad-and-requires-grad/16915/7
        next_state_values = torch.zeros(self.batch_size).to(self.device)
        print("network_update - target", state_batch.shape, action_batch.shape)
        next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1)[0].detach()
        expected_state_action_values = reward_batch + self.gamma * next_state_values
        
        # Compute Huber loss
        loss = F.smooth_l1_loss(state_action_values.squeeze(),
                                expected_state_action_values)

        # Optimize the model
        loss.backward()
        for param in self.policy_net.parameters():
            param.grad.data.clamp_(-1e-1, 1e-1)
        self.optimizer.step()

    def get_action(self, state, epsilon=0.05):
        sample = random.random()
        if sample > epsilon:
            with torch.no_grad():
                state = torch.from_numpy(state).float().to(self.device)
                print("action", state.shape)
                q_values = self.policy_net(state).to(self.device)
                return torch.argmax(q_values).item()
        else:
            return random.randrange(self.n_actions)

    def update_target_network(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())

    def store_transition(self, state, action, next_state, reward, done):
        action = torch.Tensor([[action]]).long()
        reward = torch.tensor([reward], dtype=torch.float32)
        next_state = torch.from_numpy(next_state).float()
        state = torch.from_numpy(state).float()
        self.memory.push(state, action, next_state, reward, done)


In [26]:
def plot_rewards(rewards):
    plt.figure(2)
    plt.clf()
    rewards_t = torch.tensor(rewards, dtype=torch.float)
    plt.title('Training...')
    plt.xlabel('Episode')
    plt.ylabel('Cumulative reward')
    plt.grid(True)
    plt.plot(rewards_t.numpy())
    # Take 100 episode averages and plot them too
    if len(rewards_t) >= 100:
        means = rewards_t.unfold(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeros(99), means))
        plt.plot(means.numpy())


In [27]:
# Get number of actions from gym action space
n_actions = env.action_space.n
state_space_dim = env.observation_space.shape[0]

agent = DQNAgent(state_space_dim, n_actions, replay_buffer_size, batch_size,
                  hidden, gamma)

print('n_actions', n_actions)
print('state_space_dim', state_space_dim)
print('agent policy net: ', agent.policy_net)

n_actions 2
state_space_dim 4
agent policy net:  DQN(
  (fc1): Linear(in_features=4, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=2, bias=True)
)


In [29]:
# Training loop
cumulative_rewards = []
for ep in range(num_episodes):
    # Initialize the environment and state
    print('training started ...')
    state = env.reset()
    done = False
    eps = max(eps_stop , glie_a/(glie_a+ep))
    cum_reward = 0
    while not done:
        # Select and perform an action
        action = agent.get_action(state, eps)
        next_state, reward, done, _ = env.step(action)
        cum_reward += reward

        agent.store_transition(state, action, next_state, reward, done)
        agent.update_network()

        # Move to the next state
        state = next_state

    cumulative_rewards.append(cum_reward)

    print("episode: %d: reward: %6.2f, mean_100: %6.2f, epsilon: %.2f" % (
        ep, cum_reward, np.mean(cumulative_rewards[-100:]), eps))

    if np.mean(cumulative_rewards[-100:]) > 195.0:
        print("Solved in ep : {} and break".format(ep))
        break

    # Update the target network, copying all weights and biases in DQN
    if ep % TARGET_UPDATE == 0:
        agent.update_target_network()


print('Complete')


training started ...
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 6: reward:  10.00, mean_100:  23.57, epsilon: 0.45
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_upda

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 17: reward:  78.00, mean_100:  44.17, epsilon: 0.23
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_upd

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 20: reward:  82.00, mean_100:  56.52, epsilon: 0.20
training started ...
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 22: reward: 127.00, mean_100:  61.96, epsilon: 0.19
training started ...
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_upd

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action 

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action 

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 29: reward: 158.00, mean_100:  78.87, epsilon: 0.15
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Siz

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 144: reward: 181.00, mean_100: 120.46, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Si

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 148: reward: 103.00, mean_100: 121.37, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 177: reward: 180.00, mean_100: 134.90, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Si

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 179: reward: 177.00, mean_100: 136.02, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Si

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 185: reward: 200.00, mean_100: 140.30, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_up

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

episode: 200: reward: 200.00, mean_100: 151.36, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action 

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 207: reward: 200.00, mean_100: 157.35, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Si

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 224: reward: 200.00, mean_100: 168.30, epsilon: 0.05
training started ...
action tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 225: reward: 200.00, mean_100: 168.48, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Si

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

episode: 229: reward: 200.00, mean_100: 170.15, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Si

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 234: reward: 200.00, mean_100: 170.84, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_up

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 236: reward: 200.00, mean_100: 172.12, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Si

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 249: reward: 158.00, mean_100: 178.59, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Si

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 290: reward:  42.00, mean_100: 180.97, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 294: reward: 145.00, mean_100: 180.25, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 316: reward: 137.00, mean_100: 174.17, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Si

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 317: reward: 200.00, mean_100: 174.17, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 323: reward: 145.0

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 348: reward: 167.00, mean_100: 152.61, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Si

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
acti

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target tor

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
episode: 358: reward: 152.00, mean_100: 150.99, epsilon: 0.05
training started ...
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action tor

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) to

action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
action torch.Size([4])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
network_update - target torch.Size([128, 4]) torch.Size([128, 1])
network_update - Policy torch.Size([128, 4]) torch.Size([128, 1])
netw

KeyboardInterrupt: 

In [None]:
plot_rewards(cumulative_rewards)