# StarCraft II - Environment setup

In [1]:
import pysc2.env.sc2_env as env

from pysc2.agents import random_agent  
from pysc2.lib import actions as sc2_actions

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
help(env)

Help on module pysc2.env.sc2_env in pysc2.env:

NAME
    pysc2.env.sc2_env - A Starcraft II environment.

CLASSES
    enum.IntEnum(builtins.int, enum.Enum)
        BotBuild
        Difficulty
        Race
    pysc2.env.environment.Base(builtins.object)
        SC2Env
    Agent(builtins.tuple)
        Agent
    Bot(builtins.tuple)
        Bot
    
    class Agent(Agent)
     |  Agent(race, name=None)
     |  
     |  Define an Agent. It can have a single race or a list of races.
     |  
     |  Method resolution order:
     |      Agent
     |      Agent
     |      builtins.tuple
     |      builtins.object
     |  
     |  Static methods defined here:
     |  
     |  __new__(cls, race, name=None)
     |      Create new instance of Agent(race, name)
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables (if defined)
     |  
     |  ---------

In [3]:
race = env.Race(1) # 1 = terran
agent = env.Agent(race, "Testv0") # NamedTuple [race, agent_name]

interface_dict = dict(feature_screen=16, # screen resolution in pixel
                      feature_minimap=16, # minimap resolution in pixel (smaller or equal to screen)
                      action_space="FEATURES") # either FEATURES or RGB - suggested: FEATURES

agent_interface_format = env.parse_agent_interface_format(**interface_dict) #AgentInterfaceFormat instance

game_params = dict(map_name='CollectMineralShards',#'MoveToBeacon', # simplest minigame
                   players=[agent], # use a list even for single player
                   agent_interface_format=[agent_interface_format] # use a list even for single player
                   )  

In [4]:
game = env.SC2Env(**game_params)

In [5]:
rnd_agent = random_agent.RandomAgent()

# What is the observable?

In [6]:
obs = game.reset()

In [7]:
len(obs) # list/tuple, as much entries as players

1

In [8]:
obs[0] # TimeStep class' instance -> basically a named tuple

TimeStep(step_type=<StepType.FIRST: 0>, reward=0.0, discount=0.0, observation={'single_select': array([], shape=(0, 7), dtype=int32), 'multi_select': array([], shape=(0, 7), dtype=int32), 'build_queue': array([], shape=(0, 7), dtype=int32), 'cargo': array([], shape=(0, 7), dtype=int32), 'production_queue': array([], shape=(0, 2), dtype=int32), 'last_actions': array([], dtype=int32), 'cargo_slots_available': array([0], dtype=int32), 'home_race_requested': array([1], dtype=int32), 'away_race_requested': array([0], dtype=int32), 'map_name': 'CollectMineralShards', 'feature_screen': NamedNumpyArray([[[0, 0, 0, ..., 0, 0, 0],
                  [0, 0, 0, ..., 0, 0, 0],
                  [0, 0, 0, ..., 0, 0, 0],
                  ...,
                  [0, 0, 0, ..., 0, 0, 0],
                  [0, 0, 0, ..., 0, 0, 0],
                  [0, 0, 0, ..., 0, 0, 0]],

                 [[0, 0, 0, ..., 0, 0, 0],
                  [0, 2, 2, ..., 2, 2, 0],
                  [0, 2, 2, ..., 2, 2, 0],
  

In [9]:
print('obs[0].step_type: ', obs[0].step_type, '\n')
print('obs[0].reward: ', obs[0].reward, '\n')
print('obs[0].discount: ', obs[0].discount, '\n')
print('obs[0].observation: ', obs[0].observation)

obs[0].step_type:  StepType.FIRST 

obs[0].reward:  0.0 

obs[0].discount:  0.0 

obs[0].observation:  {'single_select': array([], shape=(0, 7), dtype=int32), 'multi_select': array([], shape=(0, 7), dtype=int32), 'build_queue': array([], shape=(0, 7), dtype=int32), 'cargo': array([], shape=(0, 7), dtype=int32), 'production_queue': array([], shape=(0, 2), dtype=int32), 'last_actions': array([], dtype=int32), 'cargo_slots_available': array([0], dtype=int32), 'home_race_requested': array([1], dtype=int32), 'away_race_requested': array([0], dtype=int32), 'map_name': 'CollectMineralShards', 'feature_screen': NamedNumpyArray([[[0, 0, 0, ..., 0, 0, 0],
                  [0, 0, 0, ..., 0, 0, 0],
                  [0, 0, 0, ..., 0, 0, 0],
                  ...,
                  [0, 0, 0, ..., 0, 0, 0],
                  [0, 0, 0, ..., 0, 0, 0],
                  [0, 0, 0, ..., 0, 0, 0]],

                 [[0, 0, 0, ..., 0, 0, 0],
                  [0, 2, 2, ..., 2, 2, 0],
                  [0

In [10]:
o = obs[0].observation
for k in o.keys():
    print("\nkey: ", k)
    print("value type: ", type(o[k]))
    print("value: \n", o[k] )


key:  single_select
value type:  <class 'numpy.ndarray'>
value: 
 []

key:  multi_select
value type:  <class 'numpy.ndarray'>
value: 
 []

key:  build_queue
value type:  <class 'numpy.ndarray'>
value: 
 []

key:  cargo
value type:  <class 'numpy.ndarray'>
value: 
 []

key:  production_queue
value type:  <class 'numpy.ndarray'>
value: 
 []

key:  last_actions
value type:  <class 'numpy.ndarray'>
value: 
 []

key:  cargo_slots_available
value type:  <class 'numpy.ndarray'>
value: 
 [0]

key:  home_race_requested
value type:  <class 'numpy.ndarray'>
value: 
 [1]

key:  away_race_requested
value type:  <class 'numpy.ndarray'>
value: 
 [0]

key:  map_name
value type:  <class 'str'>
value: 
 CollectMineralShards

key:  feature_screen
value type:  <class 'pysc2.lib.named_array.NamedNumpyArray'>
value: 
 [[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 2 2 ... 2 2 0]
  [0 2 2 ... 2 2 0]
  .

The most important entries are probably:
1. feature_screen
2. feature_minimap
3. player
4. available_actions

## Screen and minimap features

Read more here: https://github.com/deepmind/pysc2/blob/master/docs/environment.md

In [12]:
print('feature_screen shape: ', o['feature_screen'].shape)
print('feature_screen dtype: ', o['feature_screen'].dtype)
print('feature_screen max (for each channel): \n', o['feature_screen'].max(axis=(1,2)))
print('feature_screen min: ', o['feature_screen'].min())
print('features names: \n', o['feature_screen']._index_names[0])

feature_screen shape:  (27, 16, 16)
feature_screen dtype:  int32
feature_screen max (for each channel): 
 [   0    2    0    0   16    3 1680    0   45  255    0    0    0    0
    2    8    0    0    0    0    0    0    0    0    1    1    0]
feature_screen min:  0
features names: 
 {'height_map': 0, 'visibility_map': 1, 'creep': 2, 'power': 3, 'player_id': 4, 'player_relative': 5, 'unit_type': 6, 'selected': 7, 'unit_hit_points': 8, 'unit_hit_points_ratio': 9, 'unit_energy': 10, 'unit_energy_ratio': 11, 'unit_shields': 12, 'unit_shields_ratio': 13, 'unit_density': 14, 'unit_density_aa': 15, 'effects': 16, 'hallucinations': 17, 'cloaked': 18, 'blip': 19, 'buffs': 20, 'buff_duration': 21, 'active': 22, 'build_progress': 23, 'pathable': 24, 'buildable': 25, 'placeholder': 26}


In [13]:
print('feature_minimap shape: ', o['feature_minimap'].shape)
print('feature_minimap  dtype: ', o['feature_minimap'].dtype)
print('feature_minimap  max (for each channel): \n', o['feature_minimap'].max(axis=(1,2)))
print('feature_minimap  min: ', o['feature_minimap'].min())
print('features names: \n', o['feature_minimap']._index_names[0])

feature_minimap shape:  (11, 16, 16)
feature_minimap  dtype:  int32
feature_minimap  max (for each channel): 
 [ 0  2  0  1 16  3  0  0  0  1  1]
feature_minimap  min:  0
features names: 
 {'height_map': 0, 'visibility_map': 1, 'creep': 2, 'camera': 3, 'player_id': 4, 'player_relative': 5, 'selected': 6, 'unit_type': 7, 'alerts': 8, 'pathable': 9, 'buildable': 10}


It seems that each of those features is categorical, but not all of them have the same range. Also, not every layer is actually mentioned in the environment; probably is a good idea to remove some of the usless layers for very specific tasks (like the minigames).

More specifically they mention:
- 13 screen features (vs 27)
- 7 minimap features (vs 11)

**TODO:** find a way to embed these features in a continuous space, even if the vocabulary sizes are all different. Also: find out the vocabulary sizes a priori.

# Player

In [14]:
print('player shape: ', o['player'].shape)
print('player  dtype: ', o['player'].dtype)
print('player  value: ', o['player'])
print('features names: \n', o['player']._index_names[0])

player shape:  (11,)
player  dtype:  int32
player  value:  [1 0 0 2 0 2 0 0 2 0 0]
features names: 
 {'player_id': 0, 'minerals': 1, 'vespene': 2, 'food_used': 3, 'food_cap': 4, 'food_army': 5, 'food_workers': 6, 'idle_worker_count': 7, 'army_count': 8, 'warp_gate_count': 9, 'larva_count': 10}


# Available actions

In [15]:
o['available_actions'] # I guess these are the IDs of the actions that is possible to take

array([0, 1, 2, 3, 4, 7], dtype=int32)

In [16]:
help(sc2_actions)

Help on module pysc2.lib.actions in pysc2.lib:

NAME
    pysc2.lib.actions - Define the static list of types and actions for SC2.

CLASSES
    builtins.object
        Functions
    enum.Enum(builtins.object)
        ActionSpace
    enum.IntEnum(builtins.int, enum.Enum)
        ControlGroupAct
        Queued
        SelectAdd
        SelectPointAct
        SelectUnitAct
        SelectWorker
    ArgumentType(builtins.tuple)
        ArgumentType
    Arguments(builtins.tuple)
        Arguments
    Function(builtins.tuple)
        Function
    FunctionCall(builtins.tuple)
        FunctionCall
    RawArguments(builtins.tuple)
        RawArguments
    ValidActions(builtins.tuple)
        ValidActions
    
    class ActionSpace(enum.Enum)
     |  ActionSpace(value, names=None, *, module=None, qualname=None, type=None, start=1)
     |  
     |  An enumeration.
     |  
     |  Method resolution order:
     |      ActionSpace
     |      enum.Enum
     |      builtins.object
     |  
     |  Dat

How to read this: 
https://github.com/deepmind/pysc2/blob/master/docs/environment.md

Basically: `<function id>/<function name>(<type id>/<type name> [<value size>, *]; *)`

Some examples:

*   `1/move_camera (1/minimap [64, 64])` is the `move_camera` function (id `1`),
    which takes one argument named `minimap` (id `1`) which requires two ints
    each in the range `[0, 64)` which represent the coordinates on the minimap.
*   `331/Move_screen (3/queued [2]; 0/screen [84, 84])` is the `Move_screen`
    function (id `331`) which takes two arguments: `queued` (id `3`) which is a
    bool and signifies whether this action should happen now or after previous
    actions, and `screen` (id `0`) which takes two ints each in the range `[0,
    84)` which represent a pixel on the screen.

The function names should be unique, stable and meaningful. The function and
type ids are the index into the list of `functions` and `types`.

The `types` are a predefined list of argument types that can be used in a
function call. The exact definitions are in `pysc2.lib.actions.TYPES`


In [17]:
funcs = list(sc2_actions.FUNCTIONS) # not sure why here instead of screen and minimap size puts 0
for f in funcs:
    print(f)

0/no_op ()
1/move_camera (1/minimap [0, 0])
2/select_point (6/select_point_act [4]; 0/screen [0, 0])
3/select_rect (7/select_add [2]; 0/screen [0, 0]; 2/screen2 [0, 0])
4/select_control_group (4/control_group_act [5]; 5/control_group_id [10])
5/select_unit (8/select_unit_act [4]; 9/select_unit_id [500])
6/select_idle_worker (10/select_worker [4])
7/select_army (7/select_add [2])
8/select_warp_gates (7/select_add [2])
9/select_larva ()
10/unload (12/unload_id [500])
11/build_queue (11/build_queue_id [10])
12/Attack_screen (3/queued [2]; 0/screen [0, 0])
13/Attack_minimap (3/queued [2]; 1/minimap [0, 0])
14/Attack_Attack_screen (3/queued [2]; 0/screen [0, 0])
15/Attack_Attack_minimap (3/queued [2]; 1/minimap [0, 0])
16/Attack_AttackBuilding_screen (3/queued [2]; 0/screen [0, 0])
17/Attack_AttackBuilding_minimap (3/queued [2]; 1/minimap [0, 0])
18/Attack_Redirect_screen (3/queued [2]; 0/screen [0, 0])
19/Scan_Move_screen (3/queued [2]; 0/screen [0, 0])
20/Scan_Move_minimap (3/queued [2]; 

# How to choose an action and make a step (random agent)

```python
class BaseAgent(object):
  """A base agent to write custom scripted agents.

  It can also act as a passive agent that does nothing but no-ops.
  """

  def __init__(self):
    self.reward = 0
    self.episodes = 0
    self.steps = 0
    self.obs_spec = None
    self.action_spec = None

  def setup(self, obs_spec, action_spec):
    self.obs_spec = obs_spec
    self.action_spec = action_spec

  def reset(self):
    self.episodes += 1

  def step(self, obs):
    self.steps += 1
    self.reward += obs.reward
    return actions.FunctionCall(actions.FUNCTIONS.no_op.id, [])

class RandomAgent(base_agent.BaseAgent):
  """A random agent for starcraft."""

  def step(self, obs):
    super(RandomAgent, self).step(obs)
    function_id = numpy.random.choice(obs.observation.available_actions)
    args = [[numpy.random.randint(0, size) for size in arg.sizes]
            for arg in self.action_spec.functions[function_id].args]
    return actions.FunctionCall(function_id, args)
    
```

So, before taking the step, we need to pass through the setup of the agen, passing obs_spec and action_spec. Looking at run_loop.py in the env folder, we can take them like this:

In [19]:
observation_spec = game.observation_spec()[0]
action_spec = game.action_spec()[0]

In [20]:
observation_spec

{'action_result': (0,),
 'alerts': (0,),
 'build_queue': (0, 7),
 'cargo': (0, 7),
 'cargo_slots_available': (1,),
 'control_groups': (10, 2),
 'game_loop': (1,),
 'last_actions': (0,),
 'map_name': (0,),
 'multi_select': (0, 7),
 'player': (11,),
 'production_queue': (0, 2),
 'score_cumulative': (13,),
 'score_by_category': (11, 5),
 'score_by_vital': (3, 3),
 'single_select': (0, 7),
 'available_actions': (0,),
 'feature_screen': (27, 16, 16),
 'feature_minimap': (11, 16, 16),
 'upgrades': (0,),
 'home_race_requested': (1,),
 'away_race_requested': (1,)}

In [21]:
action_spec

ValidActions(types=Arguments(screen=ArgumentType(id=0, name='screen', sizes=(16, 16), fn=None, values=None, count=None), minimap=ArgumentType(id=1, name='minimap', sizes=(16, 16), fn=None, values=None, count=None), screen2=ArgumentType(id=2, name='screen2', sizes=(16, 16), fn=None, values=None, count=None), queued=ArgumentType(id=3, name='queued', sizes=(2,), fn=None, values=None, count=None), control_group_act=ArgumentType(id=4, name='control_group_act', sizes=(5,), fn=None, values=None, count=None), control_group_id=ArgumentType(id=5, name='control_group_id', sizes=(10,), fn=None, values=None, count=None), select_point_act=ArgumentType(id=6, name='select_point_act', sizes=(4,), fn=None, values=None, count=None), select_add=ArgumentType(id=7, name='select_add', sizes=(2,), fn=None, values=None, count=None), select_unit_act=ArgumentType(id=8, name='select_unit_act', sizes=(4,), fn=None, values=None, count=None), select_unit_id=ArgumentType(id=9, name='select_unit_id', sizes=(500,), fn=

In [22]:
rnd_agent.setup(observation_spec, action_spec)

In [23]:
rnd_agent.step(obs[0])

FunctionCall(function=7, arguments=[[0]])

Going more in depth...

In [24]:
import numpy

# first pick a function id between those available
function_id = numpy.random.choice(obs[0].observation.available_actions)
print("function_id: ", function_id)

# then look at the arguments required by that function
list_of_args = action_spec.functions[function_id].args
print('len(list of args): ', len(list_of_args))

# and the possible values that those functions accept
print('size of each arg: ', [a.sizes for a in list_of_args])

# finally sample all arguments
args = [[numpy.random.randint(0, size) for size in arg.sizes] 
         for arg in list_of_args]
print("args: ", args)

function_id:  1
len(list of args):  1
size of each arg:  [(16, 16)]
args:  [[7, 10]]


**Important remark:** Notice that while the available actions at a given turn are passed together with the observation, the setup is done just once. This is because we need the action_spec list just as a lookup table for the arguments of all the functions, but is up to the agent to select only actions between the available ones. 


# Submit the action back to the environment

In [25]:
action = sc2_actions.FunctionCall(function_id, args)

In [26]:
new_obs = game.step(actions=[action])

# Playing an episode with a random agent

From env/run_loop.py

```python
import time


def run_loop(agents, env, max_frames=0, max_episodes=0):
  """A run loop to have agents and an environment interact."""
  total_frames = 0
  total_episodes = 0
  start_time = time.time()

  observation_spec = env.observation_spec()
  action_spec = env.action_spec()
  for agent, obs_spec, act_spec in zip(agents, observation_spec, action_spec):
    agent.setup(obs_spec, act_spec)

  try:
    while not max_episodes or total_episodes < max_episodes:
      total_episodes += 1
      timesteps = env.reset()
      for a in agents:
        a.reset()
      while True:
        total_frames += 1
        actions = [agent.step(timestep)
                   for agent, timestep in zip(agents, timesteps)]
        if max_frames and total_frames >= max_frames:
          return
        if timesteps[0].last():
          break
        timesteps = env.step(actions)
  except KeyboardInterrupt:
    pass
  finally:
    elapsed_time = time.time() - start_time
    print("Took %.3f seconds for %s steps: %.3f fps" % (
        elapsed_time, total_frames, total_frames / elapsed_time))
```

In [27]:
def init_game(interface_dict, max_steps_per_episode=1000, **kwargs):
    
    race = env.Race(1) # 1 = terran
    agent = env.Agent(race, "Testv0") # NamedTuple [race, agent_name]
    agent_interface_format = env.parse_agent_interface_format(**interface_dict) #AgentInterfaceFormat instance

    game_params = dict(map_name='MoveToBeacon', # simplest minigame
                       players=[agent], # use a list even for single player
                       game_steps_per_episode = max_steps_per_episode,
                       agent_interface_format=[agent_interface_format] # use a list even for single player
                       )  
    game = env.SC2Env(**game_params, **kwargs)
    
    return game

In [29]:
interface_dict = dict(feature_screen=16, # screen resolution in pixel
                      feature_minimap=16, # minimap resolution in pixel (smaller or equal to screen)
                      action_space="FEATURES") # either FEATURES or RGB - suggested: FEATURES

In [31]:
env = init_game(interface_dict)

In [32]:
train_agent(agent, game, max_frames=0, max_episodes=2)

Update agent
Update agent
Took 1.955 seconds for 250 steps: 127.862 fps


Frame per second (screen_res, minimap_res) <br>
85.610 fps (64,64) - 92.302 fps (64,16) - 111.974 fps (16,16)


# Saving replays

save_replay_episodes: Save a replay after this many episodes. Default of 0
             means don't save replays. <br>
replay_dir: Directory to save replays. Required with save_replay_episodes. Default None. <br>
replay_prefix: An optional prefix to use when saving replays. Default None.

OBS: the parent directory assumed is ~/StarCraftII/Replays

To watch a replay: <br>
```python
python -m pysc2.bin.play --replay <path-to-replay>
```

In [33]:
replay_dict = dict(save_replay_episodes=2,
                   replay_dir='Replays/',
                   replay_prefix='random')

In [34]:
game = init_game(interface_dict, **replay_dict)
agent = init_rnd_agent(game.observation_spec()[0], game.action_spec()[0])

In [35]:
train_agent(agent, game, max_frames=0, max_episodes=10)

Update agent
Update agent
Update agent
Update agent
Update agent
Update agent
Update agent
Update agent
Update agent
Update agent
Took 9.972 seconds for 1250 steps: 125.351 fps


To see one of the replays do something like this:

```bash
!conda activate <sc2_virtual_env>
!python -m pysc2.bin.play --replay "Replays/test1__2020-04-09-06-37-27.SC2Replay"
```

Main problem:
- /pysc2/run_configs/__init__.py needs the addition of two lines to parse correctly the flags before initializing the environment. Failing in parsing returns an error and the whole initialization fails. <br>
    `import sys` <br>
    `FLAGS(sys.argv)` <br>
- /pysc2/bin/play.py doesn't work if those changes are retained in the init file, so when one plays reports, it should disable them.

Possible lines of solution:
- find a way to make the changes in init.py compatible for both cases
- create a virtual environment just for replays with the original version (and keep the current one for development and training)

In [1]:
#A2C_2020-05-28-06-12-42.SC2Replay