In [0]:
%load_ext autoreload
%autoreload 2

from networks.actor import Actor
from networks.critic import Critic
import torch

# Actor: which takes a single input (obs + goal)
# Critic: which takes two inputs (obs+goal, action)

# Dummy environment parameters (matching Fetch environments)
env_params = {
    'obs_dim': 25,         # Size of observation vector
    'goal_dim': 3,         # Size of goal vector
    'action_dim': 4,       # Action space dimension
    'act_limit': 1.0       # Maximum action magnitude
}

"""
Actor Network
--------------
Input size:
    Observation: 25
    Goal: 3
    Total input size: 28
Output:
    Action vector → shape [10, 4]

[Input (28)] → FC1 (256) → ReLU
             → FC2 (256) → ReLU
             → FC3 (256) → ReLU
             → Output (4) → tanh → scale by act_limit (1.0)
###################################
Critic Network
-------------
Input size:
    Observation: 25
    Goal: 3
    Action: 4
    Total input size: 28 + 4 = 32

Architecture:
[Input (32)] → FC1 (256) → ReLU
             → FC2 (256) → ReLU
             → FC3 (256) → ReLU
             → Output (1) → Q-value
Output:
    Q-value → shape [10, 1] (1 scalar per sample)

"""
"""
| Item          | Shape      | Why                               |
| ------------- | ---------- | --------------------------------- |
| `obs`         | `[10, 25]` | 10 samples, 25 features each      |
| `goal`        | `[10, 3]`  | 10 samples, 3 goal features each  |
| `state_input` | `[10, 28]` | Concatenated obs + goal           |
| `action`      | `[10, 4]`  | Actor outputs 4D action           |
| `q_values`    | `[10, 1]`  | Critic outputs scalar Q per input |

"""

# Network architecture
hidden_layers = [256, 256]
# Simulate a batch of inputs
batch_size = 10

# Create Actor and Critic instances
actor = Actor(env_params, her=True, hidden_layers=hidden_layers)
critic = Critic(env_params, her=True, hidden_layers=hidden_layers)


# Random observation and goal vectors
obs = torch.randn(batch_size, env_params['obs_dim'])   # → shape: [10, 25]
goal = torch.randn(batch_size, env_params['goal_dim']) # → shape: [10, 3]

# Concatenate observation and goal (since HER=True)
state_input = torch.cat([obs, goal], dim=1)  # [10, 25] + [10, 3] → [10, 28]

# Random actions to test critic
action_input = torch.randn(batch_size, env_params['action_dim'])

# Test Actor forward pass
actions = actor(state_input) # One action vector of size 4 per input
print("Actor output shape:", actions.shape)  # Should be [10, 4]

# Test Critic forward pass
q_values = critic(state_input, actions)
# state_input → [10, 28] and  actions → [10, 4] 
# It processes them and outputs a single scalar Q-value per input sample:  q_values = [10, 1]
print("Critic output shape:", q_values.shape)  # Should be [10, 1]

# Inspect a sample output
print("Sample Actor output:\n", actions[0])
print("Sample Q-value:\n", q_values[0])

print("Actor architecture:\n", actor)
print("\nCritic architecture:\n", critic)

In [0]:
print("=== Actor Architecture ===")
for i, layer in enumerate(actor.hidden_layers):
    print(f"Hidden Layer {i+1}: {layer}")
print(f"Output Layer: {actor.action_out}")

print("\n=== Critic Architecture ===")
for i, layer in enumerate(critic.hidden_layers):
    print(f"Hidden Layer {i+1}: {layer}")
print(f"Output Layer: {critic.q_out}")

In [0]:
%pip install torchsummary


In [0]:
from torchsummary import summary
import torch.nn as nn

# The -1 means: “This dimension can be any value — usually the batch size.”

# Input size: 28 for actor, since obs (25) + goal (3)
#summary(actor, input_size=(28,)) action_input

from torchsummary import summary

print("\n--- Actor Summary ---")
summary(actor, input_size=(28,))

# Wrap critic
class CriticWrapper(nn.Module):
    def __init__(self, critic):
        super().__init__()
        self.critic = critic

    def forward(self, x):
        state, action = x[:, :28], x[:, 28:]
        return self.critic(state, action)

wrapped_critic = CriticWrapper(critic)

print("\n--- Critic Summary ---")
summary(wrapped_critic, input_size=(32,))

# Actor input: (28,)  → 25 obs + 3 goal
# | Layer     | Output Shape | Parameters | Description                              |
# | --------- | ------------ | ---------- | ---------------------------------------- |
# | Linear-1  | `[-1, 256]`  | 7,424      | (28 + 1 bias) × 256                      |
# | Linear-2  | `[-1, 256]`  | 65,792     | 256 × 256 + 256                          |
# | Linear-3  | `[-1, 4]`    | 1,028      | 256 × 4 + 4                              |
# | **Total** |              | **74,244** | ✓ This is correct for your 3-layer actor |

#Critic input: (32,) → 25 obs + 3 goal + 4 action
# | Layer        | Output Shape | Parameters | Description                                 |
# | ------------ | ------------ | ---------- | ------------------------------------------- |
# | Linear-1     | `[-1, 256]`  | 8,448      | (32 + 1 bias) × 256                         |
# | Linear-2     | `[-1, 256]`  | 65,792     | 256 × 256 + 256                             |
# | Linear-3     | `[-1, 1]`    | 257        | 256 × 1 + 1                                 |
# | **Critic-4** | `[-1, 1]`    | 0          | The `Critic` module wrapper — has no params |
# | **Total**    |              | **74,497** | ✓ Matches exactly what you designed         |
