🎒 Step 1: Replay Buffer Code Block

In [1]:
# 🎒 Replay Buffer - Experience Replay Memory

import random
from collections import deque
import numpy as np

class ReplayBuffer:
    """
    A fixed-size buffer that stores agent experiences for training the Q-network.

    Attributes:
        buffer (deque): A double-ended queue to store experience tuples.
        capacity (int): Maximum number of experiences the buffer can hold.
    """

    def __init__(self, capacity=100_000):
        """
        Initialize the replay buffer.

        Args:
            capacity (int): Maximum number of transitions to store in the buffer.
        """
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        """
        Store a new experience in the buffer.

        Args:
            state (np.ndarray): The current state.
            action (int): The action taken.
            reward (float): The reward received.
            next_state (np.ndarray): The resulting next state.
            done (bool): Whether the episode has ended.
        """
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        """
        Sample a batch of experiences from the buffer.

        Args:
            batch_size (int): The number of samples to return.

        Returns:
            tuple of np.ndarrays: Batched (states, actions, rewards, next_states, dones)
        """
        states, actions, rewards, next_states, dones = zip(*random.sample(self.buffer, batch_size))
        return (
            np.array(states),
            np.array(actions),
            np.array(rewards, dtype=np.float32),
            np.array(next_states),
            np.array(dones, dtype=np.uint8)
        )

    def __len__(self):
        """
        Return the current size of the buffer.

        Returns:
            int: Number of stored experiences.
        """
        return len(self.buffer)


In [2]:
# 🧪 Testing the ReplayBuffer functionality

# Create a buffer with small capacity for testing
buffer = ReplayBuffer(capacity=10)

# Push in 5 dummy experiences
for i in range(5):
    state = np.zeros(8) + i  # Fake state vector (8 features like LunarLander)
    action = i % 4
    reward = float(i)
    next_state = state + 1
    done = i % 2 == 0
    buffer.push(state, action, reward, next_state, done)

print(f"✅ Buffer size after pushing: {len(buffer)}")

# Sample 3 experiences
sampled = buffer.sample(3)
print("✅ Sampled batch shapes:")
print("States shape:", sampled[0].shape)
print("Actions shape:", sampled[1].shape)
print("Rewards shape:", sampled[2].shape)
print("Next States shape:", sampled[3].shape)
print("Dones shape:", sampled[4].shape)


✅ Buffer size after pushing: 5
✅ Sampled batch shapes:
States shape: (3, 8)
Actions shape: (3,)
Rewards shape: (3,)
Next States shape: (3, 8)
Dones shape: (3,)


🧠 Step 2: Q-Network (Neural Net Brain)

In [3]:
# 🧠 Q-Network - The Deep Neural Network Approximator

import torch
import torch.nn as nn

class QNetwork(nn.Module):
    """
    A feedforward neural network that estimates Q-values for each action
    given a state input from the environment.

    Attributes:
        model (nn.Sequential): The sequential network architecture.
    """

    def __init__(self, state_size, action_size):
        """
        Initialize the Q-network layers.

        Args:
            state_size (int): Dimension of the input state.
            action_size (int): Number of possible discrete actions.
        """
        super(QNetwork, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(state_size, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, action_size)
        )

    def forward(self, x):
        """
        Forward pass of the network.

        Args:
            x (torch.Tensor): The input state tensor.

        Returns:
            torch.Tensor: The predicted Q-values for each action.
        """
        return self.model(x)


In [4]:
# 🧪 Testing the QNetwork with dummy input

state_size = 8     # LunarLander-v2 has 8 state features
action_size = 4    # 4 discrete actions

net = QNetwork(state_size, action_size)

dummy_input = torch.randn(1, state_size)
output = net(dummy_input)

print("✅ Output Q-values:", output)
print("✅ Output shape:", output.shape)


✅ Output Q-values: tensor([[ 0.1017,  0.1722, -0.2195,  0.0700]], grad_fn=<AddmmBackward0>)
✅ Output shape: torch.Size([1, 4])
