# Relational Deep Reinforcement Learning

**Plan:**
1. Architecture
2. Agent
3. Environment
4. Training cycle

## Architecture

**Input: (b,n,n,3)** = (batch length, linear size, linear size, RGB)

**Extract entities: (b,n,n,3) -> (b, m, m, 2k)** 
* convolutional_layer1(kernel_size = (2,2), input_filters = 3, output_filters = k, stride = 1, pad = (1,1))
* convolutional_layer2(kernel_size = (2,2), input_filters = k, output_filters = 2k, stride = 1, pad = (1,1))

**Relational block: (b, m, m, 2k) -> (b,2k+2)**
* Positional Encoding: (b, m, m, 2k) -> (b, m^2, 2k+2)
* N Multi-Headed Attention blocks: (b, m^2, 2k+2) -> (b, m^2, 2k+2)
* Feature-wise max pooling: (b, m^2, 2k+2) -> (b, 2k+2)

**Actor output: (b,2k+2) -> (b,a)** [a = number of possible actions]
* 4 fully connected layers with ReLUs
* Single linear layer with softmax at the end

**Critic output: (b,2k+2) -> (b,1)** 
* 4 fully connected layers with ReLUs
* Single linear layer without activation function

In [8]:
import numpy as np
import torch 

import torch.nn as nn
import torch.nn.functional as F

In [22]:
class ExtractEntities(nn.Module):
    """Parse raw RGB pixels into entieties (vectors of 2*k_out dimensions)"""
    def __init__(self, k_out, k_in=3, kernel_size=2, stride=1, padding=0):
        super(ExtractEntities, self).__init__()
        layers = []
        layers.append(nn.Conv2d(k_in, k_out, kernel_size, stride, padding))
        layers.append(nn.ReLU())
        layers.append(nn.Conv2d(k_out, k_out*2, kernel_size, stride, padding))
        layers.append(nn.ReLU())
        self.net = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.net(x)    

class PositionwiseFeedForward(nn.Module):
    "Implements FFN equation."
    def __init__(self, d_model, d_ff, dropout=0.1):
        super(PositionwiseFeedForward, self).__init__()
        self.w_1 = nn.Linear(d_model, d_ff)
        self.w_2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.w_2(self.dropout(F.relu(self.w_1(x))))
        
class EncoderBlock(nn.Module):
    def __init__(self, n_features, n_heads, n_hidden=64, dropout=0.1):
        """
        Args:
          n_features: Number of input and output features. (d_model)
          n_heads: Number of attention heads in the Multi-Head Attention.
          n_hidden: Number of hidden units in the Feedforward (MLP) block. (d_k)
          dropout: Dropout rate after the first layer of the MLP and the two skip connections.
        """
        super(EncoderBlock, self).__init__()
        self.norm = nn.LayerNorm(n_features)
        self.dropout = nn.Dropout(dropout)
        self.attn = nn.MultiheadAttention(n_features, n_heads, dropout)
        self.ff = PositionwiseFeedForward(n_features, n_hidden, dropout)
        
    def forward(self, x, mask=None):
        """
        Args:
          x of shape (max_seq_length, batch_size, n_features): Input sequences.
          mask of shape (batch_size, max_seq_length): Boolean tensor indicating which elements of the input
              sequences should be ignored.
        
        Returns:
          z of shape (max_seq_length, batch_size, n_features): Encoded input sequence.

        Note: All intermediate signals should be of shape (max_seq_length, batch_size, n_features).
        """

        attn_output, attn_output_weights = self.attn(x,x,x, key_padding_mask=mask) # MHA step
        x_norm = self.dropout(self.norm(attn_output + x)) # add and norm
        z = self.ff(x_norm) # FF step
        return self.dropout(self.norm(z)) # add and norm

In [123]:
class PositionalEncoding(nn.Module):
    def __init__(self, in_dim, n_features):
        super(PositionalEncoding, self).__init__()
        self.projection = nn.Linear(in_dim + 2, n_features)

    def forward(self, x):
        x = self.add_encoding2D(x)
        x = x.view(x.shape[0], x.shape[1],-1)
        x = self.projection(x.transpose(2,1))
        return x
    
    @staticmethod
    def add_encoding2D(x):
        x_ax = x.shape[-2]
        y_ax = x.shape[-1]
        
        x_lin = torch.linspace(-1,1,x_ax)
        xx = x_lin.repeat(y_ax).view(1, 1, y_ax, x_ax).transpose(3,2)
        
        y_lin = torch.linspace(-1,1,y_ax).view(-1,1)
        yy = y_lin.repeat(1,x_ax).view(1, 1, y_ax, x_ax).transpose(3,2)

        x = torch.cat((x,xx,yy), axis=1)
        return x

In [121]:
# single frame-like input

x = torch.rand((1,3,12,12))

# Convolutional pass

get_entities = ExtractEntities(12)
y = get_entities(x)
y.shape

torch.Size([1, 24, 10, 10])

In [122]:
# Positional encoding

pe = PositionalEncoding(24, 256)
z = pe(y)
z.shape

torch.Size([1, 24, 10, 10])
torch.Size([1, 1, 10, 10])
torch.Size([1, 1, 10, 10])
torch.Size([1, 26, 10, 10])


torch.Size([1, 100, 256])

In [124]:
# MHA

encoder = EncoderBlock(256, 2)
w = encoder(z)
w.shape

torch.Size([1, 100, 256])

Make encoder with cloned encoder blocks. We will use 2 or 4 of them


In [19]:
help(nn.MultiheadAttention)

Help on class MultiheadAttention in module torch.nn.modules.activation:

class MultiheadAttention(torch.nn.modules.module.Module)
 |  MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None)
 |  
 |  Allows the model to jointly attend to information
 |  from different representation subspaces.
 |  See reference: Attention Is All You Need
 |  
 |  .. math::
 |      \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
 |      \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
 |  
 |  Args:
 |      embed_dim: total dimension of the model.
 |      num_heads: parallel attention heads.
 |      dropout: a Dropout layer on attn_output_weights. Default: 0.0.
 |      bias: add bias as module parameter. Default: True.
 |      add_bias_kv: add bias to the key and value sequences at dim=0.
 |      add_zero_attn: add a new batch of zeros to the key and
 |                     value sequences at dim=1.
 |  