In [50]:
import numpy as np
import torch

from importlib import reload
from RelationalModule import MLP_AC_networks as net

from pysc2.agents import base_agent
from pysc2.lib import actions
from pysc2.lib import features
from pysc2.env import sc2_env, run_loop, available_actions_printer
from pysc2 import maps
from absl import flags

# indexes of useful layers of the screen_features
_PLAYER_RELATIVE = features.SCREEN_FEATURES.player_relative.index 
_SELECTED = features.SCREEN_FEATURES.selected.index

# Identifiers in player_relative feature layer
_BACKGROUND = 0
_PLAYER_FRIENDLY = 1
_PLAYER_ALLIES = 2
_PLAYER_NEUTRAL = 3
_PLAYER_HOSTILE = 4

# Ids of the actions that we'll use
_NO_OP = actions.FUNCTIONS.no_op.id
_MOVE_SCREEN = actions.FUNCTIONS.Attack_screen.id
_SELECT_ARMY = actions.FUNCTIONS.select_army.id

# Meaning of some arguments required by the actions
_SELECT_ALL = [0]
_NOT_QUEUED = [0]

In [5]:
race = sc2_env.Race(1) # 1 = terran
agent = sc2_env.Agent(race, "Testv0") # NamedTuple [race, agent_name]

interface_dict = dict(feature_screen=16, # screen resolution in pixel
                      feature_minimap=16, # minimap resolution in pixel (smaller or equal to screen)
                      action_space="FEATURES") # either FEATURES or RGB - suggested: FEATURES

agent_interface_format = sc2_env.parse_agent_interface_format(**interface_dict) #AgentInterfaceFormat instance

game_params = dict(map_name='MoveToBeacon', # simplest minigame
                   players=[agent], # use a list even for single player
                   agent_interface_format=[agent_interface_format] # use a list even for single player
                   )  

In [6]:
# create an envirnoment
env = sc2_env.SC2Env(**game_params)

## Defining an high-level state

For sure the most relevant informations are the position of the marine and the one of the center of the beacon. Then it might be useful to have a boolean feature telling us whether the beacon exists of not in the map and finally another flag telling us if the marine is selected or not (so that instead of relying only on the final mask for the available actions we can learn that some action, e.g. select army, are more valuable if we don't have units selected).

In [46]:
def get_state(obs):
    player_relative = obs[0].observation['feature_screen'][_PLAYER_RELATIVE]
    
    player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero()
    player_pos = [player_x.mean(), player_y.mean()]

    beacon_ys, beacon_xs = (player_relative == _PLAYER_NEUTRAL).nonzero()
    if beacon_ys.any():
        beacon_pos = [beacon_xs.mean(), beacon_ys.mean()]
    else:
        beacon_pos = [-1., -1.]
        
    beacon_exists = float(beacon_ys.any())
    
    selected = obs[0].observation['feature_screen'][_SELECTED]
    is_selected = np.any((selected==1).nonzero()[0]).astype(float) 
    
    state = np.concatenate([player_pos, beacon_pos, [beacon_exists, is_selected]])
    
    return state

In [35]:
obs = env.reset()

In [36]:
player_relative = obs[0].observation['feature_screen'][_PLAYER_RELATIVE]
player_relative

NamedNumpyArray([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 1, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

How to read:
- 0 stays for background cells
- 1 for cells owned by friendly units and buildings
- 3 for cells occupied by neutral units or objects (the beacon in our case)

Observation: interestingly enough, a map of 16 by 16 is good enough to represent both our unit and the beacon, so there is no need to consider greater resolutions.

In [41]:
selected = obs[0].observation['feature_screen'][_SELECTED]
selected

NamedNumpyArray([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [43]:
is_selected = np.any((selected==1).nonzero()[0]).astype(float) 
is_selected

0.0

As we can see, no unit selected at the beginning...

In [38]:
action = actions.FunctionCall(_SELECT_ARMY, [_SELECT_ALL])
new_obs = env.step(actions=[action])

In [44]:
selected = new_obs[0].observation['feature_screen'][_SELECTED]
selected

NamedNumpyArray([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [45]:
is_selected = np.any((selected==1).nonzero()[0]).astype(float) 
is_selected

1.0

## Actions

Inspecting the action specifics for this map, one can see that actually all the actions are listed, so that is not going to help in defining the action space. Moreover the available actions change w.r.t. the state, that basically is whether our unit is selected or not.

Reduce the action space to 3 moves:
1. _NO_OP
2. _SELECT_ARMY
3. _MOVE_SCREEN

Move screen is the only one that can be unavailable if the agent is not selected.

We are going to compute a custom mask starting from the available actions and the ids of these 3 actions.

In [96]:
def get_scripted_arguments(action_id, obs):
    
    if action_id == _SELECT_ARMY:
        args = [_SELECT_ALL]
        
    elif action_id == _MOVE_SCREEN:
        player_relative = obs[0].observation['feature_screen'][_PLAYER_RELATIVE]
    
        player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero()
        player_pos = [int(player_x.mean()), int(player_y.mean())]

        beacon_ys, beacon_xs = (player_relative == _PLAYER_NEUTRAL).nonzero()
        
        if beacon_ys.any():
            coord = [int(beacon_xs.mean()), int(beacon_ys.mean())]
        else:
            coord = player_pos
            
        args = [coord]
    
    else:
        
        args = []
        
    return args

In [115]:
reload(net)

<module 'RelationalModule.MLP_AC_networks' from '/home/nicola/Nicola_unipd/MasterThesis/SC2-RL/RelationalModule/MLP_AC_networks.py'>

In [116]:
action_space = 3
observation_space = 6
actor = net.Actor(action_space, observation_space)

In [138]:
obs = env.reset()

state = get_state(obs)
print("state: ", state)
state = torch.tensor(state).float()

aa = obs[0].observation.available_actions

state:  [14.  3.  4.  5.  1.  0.]


In [139]:
log_probs = actor(state, aa)
log_probs

tensor([-0.4653, -0.9888,    -inf], grad_fn=<LogSoftmaxBackward>)

In [140]:
from torch.distributions import Categorical

In [141]:
probs = torch.exp(log_probs)
distribution = Categorical(probs)
a = distribution.sample().item()
print("action: ", a)

action:  0


In [142]:
action_id = actor.action_dict[a]
print("action_id: ", action_id)

action_id:  _Functions.no_op


In [143]:
args = get_scripted_arguments(action_id, obs)
print("args: ", args)
action = actions.FunctionCall(action_id, args)

args:  []


In [144]:
obs = env.step([action])

In [145]:
state = get_state(obs)
state

array([14.,  3.,  4.,  5.,  1.,  0.])

So all this part should be included in the get_action method of the actor critic.