In [1]:
import sys
sys.path.insert(0, "../")

import numpy as np
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical

In [2]:
from pysc2.env import sc2_env
from pysc2.lib import actions as sc_actions
from SC_Utils.game_utils import IMPALA_ObsProcesser, FullObsProcesser

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
def init_game(game_params, map_name='MoveToBeacon', step_multiplier=8, **kwargs):

    race = sc2_env.Race(1) # 1 = terran
    agent = sc2_env.Agent(race, "Testv0") # NamedTuple [race, agent_name]
    agent_interface_format = sc2_env.parse_agent_interface_format(**game_params) #AgentInterfaceFormat instance

    game_params = dict(map_name=map_name, 
                       players=[agent], # use a list even for single player
                       game_steps_per_episode = 0,
                       step_mul = step_multiplier,
                       agent_interface_format=[agent_interface_format] # use a list even for single player
                       )  
    env = sc2_env.SC2Env(**game_params, **kwargs)

    return env

In [4]:
# Environment parameters
RESOLUTION = 32
game_params = dict(feature_screen=RESOLUTION, feature_minimap=RESOLUTION, action_space="FEATURES") 
game_names = ['MoveToBeacon','CollectMineralShards','DefeatRoaches','FindAndDefeatZerglings',
              'DefeatZerglingsAndBanelings','CollectMineralsAndGas','BuildMarines']
map_name = game_names[1]
obs_proc_params = {'select_all':True}
op = FullObsProcesser(**obs_proc_params)
screen_channels, minimap_channels, in_player = op.get_n_channels()
in_channels = screen_channels + minimap_channels 

"""
# A2C params
spatial_model = net.FullyConvPlayerAndSpatial
nonspatial_model = net.FullyConvNonSpatial
# Internal features, passed inside a dictionary
conv_channels = flags.conv_channels #32
player_features = flags.player_features #16
# Exposed features, passed outside of a dictionary
n_channels = conv_channels + player_features #48
n_features = flags.n_features #256

spatial_dict = {"in_channels":in_channels, 'in_player':in_player, 
                'conv_channels':conv_channels, 'player_features':player_features}
nonspatial_dict = {'resolution':RESOLUTION, 'kernel_size':3, 'stride':2, 'n_channels':n_channels}

HPs = dict(spatial_model=spatial_model, nonspatial_model=nonspatial_model,
       n_features=n_features, n_channels=n_channels, action_names=flags.action_names,
       spatial_dict=spatial_dict, nonspatial_dict=nonspatial_dict)
game_params['HPs'] = HPs
"""
print()




In [5]:
env = init_game(game_params, map_name)

In [6]:
obs = env.reset()

In [10]:
#action = sc_actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])
action = sc_actions.FunctionCall(sc_actions.FUNCTIONS.select_army.id, [[0]]) 
#action = sc_actions.FunctionCall(actions.FUNCTIONS.Attack_screen.id, [[0],[1,1]])
obs = env.step(actions=[action])

In [11]:
action_names = ['no_op', 'move_camera', 'select_point', 'select_rect', 'select_idle_worker', 'select_army', 
              'Attack_screen','Attack_minimap', 'Build_Barracks_screen', 'Build_CommandCenter_screen',
              'Build_Refinery_screen', 'Build_SupplyDepot_screen','Harvest_Gather_SCV_screen', 
              'Harvest_Return_SCV_quick', 'HoldPosition_quick', 'Move_screen', 'Move_minimap',
              'Rally_Workers_screen', 'Rally_Workers_minimap','Train_Marine_quick', 'Train_SCV_quick']

In [12]:
action_ids = [sc_actions.FUNCTIONS[a_name].id for a_name in action_names]
action_table = np.array([action_ids[i] for i in range(len(action_ids))])

In [13]:
IMP_op = IMPALA_ObsProcesser(action_table, **obs_proc_params)

# 1. Last Action as additional input

Obs: All actions that are invalid or equivalent to no-op are not recorded by the environment, so last actions will be [ ]

In [14]:
last_action = obs[0].observation['last_actions']
print(last_action)
if len(last_action) == 0:
    last_action = 0
else:
    last_action = last_action[0]
last_action

[7]


7

In [15]:
last_action_idx = np.where(IMP_op.action_table == last_action)[0][0]
print(IMP_op.action_table)
IMP_op.action_table[last_action_idx]

[  0   1   2   3   6   7  12  13  42  44  79  91 268 273 274 331 332 343
 344 477 490]


7

We also need somehow to embed the last action in a meaningful way (I guess that an embedding layer with embedding dim of 10 would do). After that we can simply concatenate player\_info with last\_action.

The model already has the information about the action space, so we just need to pass the embed\_dim variable (we can actually keep it constant to 10 for simplicity).

# 2. Screen / Minimap / Categorical action 

Task: tile a binary mask to screen and minimap with ones if respectively last action was acting on the screen or on the minimap, with zeros otherwise.

How to understand if an action is for screen or minimap? At the moment I just have a spatial vs categorical distinction at the argument level, but nothing screen vs minimap vs other at the main action level.

It makes sense to build a look-up table before the beginning of the training to answer this question as fast as possible during runtime.

In [16]:
all_actions = env.action_spec()[0][1]
all_arguments = env.action_spec()[0][0]

In [17]:
def check_if_screen(sc_env_action, screen=True):
    """
    Modify this function in a method for some class that has access to the action specs
    (could be the wrapped Environment class, with self.env instead of env)
    """
    all_actions = env.action_spec()[0][1]
    all_arguments = env.action_spec()[0][0]
    
    ###
    args = all_actions[sc_env_action].args
    names = [all_arguments[arg.id].name for arg in args]
    if screen:
        return np.any(['screen' in n for n in names])
    else:
        return np.any(['minimap' in n for n in names])

In [18]:
check_if_screen(last_action)

False

In [19]:
screen_mask = list(map(check_if_screen, IMP_op.action_table))
screen_mask

[False,
 False,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 True,
 True,
 True,
 True,
 False,
 False,
 True,
 False,
 True,
 False,
 False,
 False]

In [20]:
minimap_mask = list(map(lambda x: check_if_screen(x, False), IMP_op.action_table))
minimap_mask

[False,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 False]

In [21]:
state_dict, names = IMP_op.get_state(obs)

In [22]:
state_dict.keys()

dict_keys(['screen_layers', 'minimap_layers', 'player_features'])

In [23]:
state_dict['screen_layers'].shape

(26, 32, 32)

In [24]:
state_dict['minimap_layers'].shape

(12, 32, 32)

In [34]:
state_dict['player_features'].shape

(8,)

In [25]:
# simple access during run time
screen_binary_mask = np.array([screen_mask[last_action_idx]])
screen_binary_mask2D = np.tile(screen_binary_mask, [1,32,32])
state_dict['screen_layers'] = np.concatenate([state_dict['screen_layers'], screen_binary_mask2D])
state_dict['screen_layers'].shape

(27, 32, 32)

And of course same thing for minimap.

# 3. Spatial input processing

*Spatially encoded inputs (minimap and screen) are tiled with binary masks denoting
whether the previous action constituted a screen- or minimap-related action. These tensors are then fed to
independent residual convolutional blocks, each consisting of one convolutional layer (4 × 4 kernels and stride
2) followed by a residual block with 2 convolutional layers (3 × 3 kernels and stride 1), which process and
downsample the inputs to [8 × 8 × #channels 1 ] outputs. These tensors are concatenated along the depth
dimension to form a singular spatial input (inputs 3D ).*

Differences with previous implementation: 
1. First process them, then merge them

In [40]:
class ResidualConvolutional(nn.Module):
    
    def __init__(self, res, n_channels, hidden_channels=12, kernel_size=3):
        super(ResidualConvolutional, self).__init__()
        
        padding = (kernel_size - 1) // 2
        assert (kernel_size - 1) % 2 == 0, 'Provide odd kernel size to use this layer'
        
        # pre-activations as in Identity Mappings in Deep Residual Networks https://arxiv.org/abs/1603.05027
        self.net = nn.Sequential(
                                nn.ReLU(),
                                nn.Conv2d(n_channels, hidden_channels, kernel_size, stride=1, padding=padding),
                                nn.ReLU(),
                                nn.Conv2d(hidden_channels, n_channels, kernel_size, stride=1, padding=padding)
                                )
        
    def forward(self, x):
        x = self.net(x) + x
        return x 

In [41]:
class ConvBlock(nn.Module):
    """ At the moment without residual stuff, just for dimensionality check """
    def __init__(self, res, in_channels, out_channels, kernel_size=4, stride=2, padding=1):
        super(ConvBlock, self).__init__()
        new_res = (res - kernel_size + 2*padding)//stride + 1
        self.net = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
            ResidualConvolutional(new_res, out_channels, kernel_size=3),
        )
        
    def forward(self, x):
        return self.net(x)

In [99]:
res = 32
in_channels = screen_channels + 1
out_channels = 32
conv_block = ConvBlock(res, in_channels, out_channels, stride=1)

In [100]:
screen_tensor = torch.tensor(state_dict['screen_layers']).float().unsqueeze(0)
#screen_tensor = torch.rand((1,in_channels, res, res))
x_screen = conv_block(screen_tensor)

In [101]:
x_screen.shape

torch.Size([1, 32, 31, 31])

In [102]:
kernel_size = 5
padding = 2
stride = 1
res = 32
new_res = (res - kernel_size + 2*padding)//stride + 1
new_res

32

So basically we will have 2 convolutional blocks, one for the minimap and one for the screen. If we want a shortcut of this we can just merge them together and use a single convolutional block, but of course this way might make a better use of the domani knowledge (i.e. don't treat spatial information at two different scales like if it was from the same).

# 4. Variable dimensionality recap

**screen**: tiling binary mask and adding batch dim: (1, screen_channels, res, res) <br>
**minimap**: tiling binary mask and adding batch dim: (1, minimap_channels, res, res) <br>
**player**:  adding batch dim: (1, in_player) <br>
**last_action**:  adding batch dim: (1, 1) <br>

### After state encoding:

**inputs_3D**: (1, #channels_1, new_res, new_res) <br>
with `new_res = (res - kernel_size + 2*padding)//stride + 1` and `#channels_1 = out_channels*2` (default 64 and 32 respectively)

**inputs_2D**: (1, in_player+embed_dim) -> (1,128) -> ReLU -> (1, 64) <br>
with default value of embed_dim equal to 10.

### After memory processing:
Note that inputs_3D are used as input to the Conv2D LSTM and not inputs_2D! Also call the output outputs_3D.

Conv2D LSTM: kernel size 3x3, stride 1, (padding of 1 to keep resolution constant), #output_channels 96

**outputs_3D**: (1, #output_channels, new_res, new_res) (same spatial resolution)

### Main processing (control part for the relational processing)

Input: outputs_3D

2 flows:

SPATIAL: 12-layer deep residual model ( 4 blocks of 3 convolutional layers each )
   - first: kernel 4x4, stride 1 (?)
   - second and third: kernel 3x3, stride 1 
   - "interleaved with ReLU activations and skip-connections" (I don't know if after every layer or every block; <br> ReLUs make sense after each layer, skip connections after every block maybe) 
    
Since the output should have the same shape of the relational-spatial outputs, i.e. [8, 8, #channels2], and inputs_3D already have that spatial resolution in the paper, I would substitute 8 with a more generic new_res and deduce that the whole spatial architecture has padding so that the resolution remains unchanged. #channels2 is not better specified (we can keep it the same as #output_channels for semplicity)

NON-SPATIAL: flattened (all three dimensions I guess) and passed to a 2-layer MLP (512 units per layer, ReLU activations) to produce what we refer to above as relational-nonspatial <br>
(1, 64 x #channels2) -> (1, 512) -> ReLU -> (1, 512) -> ReLU

### Output processing

inputs 2D and relational-nonspatial are concatenated to form a set of shared features. <br>
Shared features used to produce log probs and V. <br>
shared features: 
- (1, 64 + 512) -> (1, 256) -> ReLU -> (1, #actions) for logits
- (1, 64 + 512) -> (1, 256) -> ReLU -> (1, 1) for value

Actions are sampled using computed policy logits and embedded into a 16 dimensional vector. 

This embedding is used to condition shared features and generate logits for non-spatial arguments (Args) through independent linear combinations (one for each argument). [Basically we concatenate the action to shared features and then pass it through a 2-layers MLP?]

Finally, spatial arguments (Args x,y ) are obtained by first deconvolving relational-spatial to [32 × 32 × #channels 3 ] tensors using Conv2DTranspose layers, conditioned by tiling the action embedding along the depth dimension and passed to a 1 × 1 × 1 convolution layers (one for each spatial argument). 
1 x 1 x 1 means a Conv2d with kernel size of 1 and output channels of 1.

#channels_3 = 16 <br>
Conv2DTranspose: kernel size 4x4, stride 2 (does it work?)


# Conv2D_LSTM layer
From https://github.com/ndrplz/ConvLSTM_pytorch

In [47]:
from ConvLSTM_pytorch.convlstm import ConvLSTM

In [89]:
input_channels = out_channels
# in case of a single layer
num_layers = 1
kernel_size = 3
hidden_channels = [96 for _ in range(num_layers)] # not sure about this
kernel = [(kernel_size, kernel_size) for _ in range(num_layers)] # pay attention to this
conv_lstm = ConvLSTM(input_channels, 
                     hidden_channels, 
                     kernel, 
                     num_layers,
                     batch_first=False,
                     bias=True,
                     return_all_layers=True
                    )

In [90]:
#input_test1 = torch.rand(1, 1, input_channels, new_res, new_res)
input_test2 = torch.rand(5, 10, input_channels, new_res, new_res)

In [91]:
# input must be 5d (t, b, channels, w, h) or (b, t, channels, w, h) 
layer_output_list, last_state_list = conv_lstm(input_test2)

In [77]:
assert len(layer_output_list) == num_layers, "len(layer_output_list) is %d"%len(layer_output_list)
layer_output_list[0].shape

torch.Size([5, 10, 96, 16, 16])

In [78]:
assert len(last_state_list) == num_layers, "len(last_state_list) is %d"%len(last_state_list)
assert len(last_state_list[0]) == 2, "len(last_state_list[0]) is %d"%len(last_state_list[0])
assert last_state_list[0][0].shape == last_state_list[0][1].shape, 'h and c have different shapes'
assert torch.all(last_state_list[0][0] == layer_output_list[0][:,-1,...]), 'they both should be last h of the first layer'
last_state_list[0][0].shape

torch.Size([5, 96, 16, 16])

**layer_output_list**: <br>
[(b,t,c,w,h), ..., (b,t,c,w,h)] <br>
List length equal to number of layers

**last_state_list**: <br>
[[h,c], ..., [h,c]] <br>
with both c and h of shape (b,c,w,h) - no time dimension in hidden and cell states (because is understood that they are relative to the last timestep)

In [93]:
# looping one step at the time and using the previous hidden state as new state of the lstm
T = 10
hidden_states = None
for t in range(T):
    layer_output_list, hidden_states = conv_lstm(input_test2, hidden_states)
out = layer_output_list[-1]
out.shape

torch.Size([10, 5, 96, 16, 16])

After this step it would make sense (maybe) to merge batch and time dimensions (I'm thinking about the learner).

In case of the actors, we just have a batch dimension of 1, so we will add a fake time dimension in front of it (or as second dimension, in a coherent way with the tensors coming out of the buffers that will be used by the learner) and we will shrink it again to 4d afterwards.

Just use something like:

out = out.view((-1,*out.shape[2:]))

In [95]:
out = out.transpose(1,0).reshape((-1,*out.shape[2:]))
out.shape

torch.Size([50, 96, 16, 16])

Since we're working with time first and then batch but we receive the output of the lstm as batch first, we need to permute it again before collapsing the two dimensions.

# Main processing

No idea on how to make a 4x4 convolution without loosing resolution... use 5x5 with padding of 2 in the meanwhile.

Also I use a layer skip-connection, since it's not clear if they're using it for every layer or every block.

**How can they decide that the output is going to have 32 channels if is a residual block?**

At the moment residual layers are lacking BatchNormalization (not possible) and LayerNormalization (possible but not in the original implementation)

In [104]:
class ResidualConvLayer(nn.Module):
    
    def __init__(self, res, n_channels, kernel_size=3):
        super(ResidualConvLayer, self).__init__()
        
        padding = (kernel_size - 1) // 2
        assert (kernel_size - 1) % 2 == 0, 'Provide odd kernel size to use this layer'
        
        # pre-activations as in Identity Mappings in Deep Residual Networks https://arxiv.org/abs/1603.05027
        self.net = nn.Sequential(
                                nn.ReLU(),
                                nn.Conv2d(n_channels, n_channels, kernel_size, stride=1, padding=padding),
                                )
        
    def forward(self, x):
        x = self.net(x) + x
        return x 

In [106]:
class ResidualConvBlock(nn.Module):
    def __init__(self, in_channels, res):
        super(ResidualConvBlock, self).__init__()
        # pre-activations as in Identity Mappings in Deep Residual Networks https://arxiv.org/abs/1603.05027
        self.net = nn.Sequential(
            ResidualConvLayer(res, in_channels, kernel_size=5),
            ResidualConvLayer(res, in_channels, kernel_size=3),
            ResidualConvLayer(res, in_channels, kernel_size=3)
        )
        
    def forward(self, x):
        return self.net(x)

In [107]:
class DeepResidualBlock(nn.Module):
    def __init__(self, in_channels, res, n_blocks=3):
        super(DeepResidualBlock, self).__init__()
        self.net = nn.Sequential(
            *[ResidualConvBlock(in_channels, res) for _ in range(n_blocks)]
        )
        
    def forward(self, x):
        return self.net(x)

In [109]:
# we got 96 and 16, we should have 32 and 8 somehow
deep_residual_spatial = DeepResidualBlock(in_channels=hidden_channels[-1], res=new_res) 

In [110]:
spatial_features = deep_residual_spatial(out)
spatial_features.shape

In [112]:
class NonSpatialBlock(nn.Module):
    def __init__(self, in_channels, res):
        super(NonSpatialBlock, self).__init__()
        self.flattened_size = in_channels*(res**2)
        self.net = nn.Sequential(
            nn.Linear(self.flattened_size, 512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU()
        )
    
    def forward(self, x):
        x = x.view(-1, self.flattened_size)
        return self.net(x)

In [151]:
class Inputs2D_Net(nn.Module):
    def __init__(self, in_player, n_actions, embedding_dim=10):
        super(Inputs2D_Net, self).__init__()
        self.out_features = 64 # in case needed from outside
        self.embedding = nn.Embedding(n_actions, embedding_dim, padding_idx=0) # no_op action mapped to 0
        self.MLP = nn.Sequential(
            nn.Linear(in_player+embedding_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU()
        )
        
    def forward(self, player_info, last_action):
        """
        player_info: (batch, in_player)
        last_action: (batch,)
        """
        embedded_action = self.embedding(last_action).float()
        nonspatial_input = torch.cat([player_info, embedded_action], dim=1)
        out = self.MLP(nonspatial_input)
        return out

In [152]:
n_actions = len(action_names)
inputs2d_net = Inputs2D_Net(in_player, n_actions)

In [153]:
player_tensor = torch.tensor(state_dict['player_features']).view(1,-1).float()
last_action_tensor = torch.LongTensor([last_action_idx])
inputs2d = inputs2d_net(player_tensor, last_action_tensor)
inputs2d.shape

torch.Size([1, 64])

In [None]:
class ActorCriticHead(nn.Module):
    def __init__()