# Pysc2 tutorial

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import importlib
import threading

from absl import app
from absl import flags
from future.builtins import range  # pylint: disable=redefined-builtin

from pysc2 import maps
from pysc2.env import available_actions_printer
from pysc2.env import run_loop
from pysc2.env import sc2_env
from pysc2.lib import point_flag
from pysc2.lib import stopwatch

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
import os
os.environ["SC2PATH"] = "/home/AI_core/StarCraftII"

In [18]:
getattr(importlib.import_module('pysc2.agents.random_agent'), 'RandomAgent')

pysc2.agents.random_agent.RandomAgent

In [3]:
# unfortunately, PySC2 uses Abseil, which treats python code as if its run like an app
# This does not play well with jupyter notebook
# So we will need to monkeypatch sys.argv


import sys
sys.argv = ["python", "--map", "Simple64"]

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run an agent."""

FLAGS = flags.FLAGS

# because of Abseil's horrible design for running code underneath Colabs
# We have to pull out this ugly hack from the hat
if "flags_defined" not in globals():
    flags.DEFINE_bool("render", True, "Whether to render with pygame.")
    point_flag.DEFINE_point("feature_screen_size", "84",
                            "Resolution for screen feature layers.")
    point_flag.DEFINE_point("feature_minimap_size", "64",
                            "Resolution for minimap feature layers.")
    point_flag.DEFINE_point("rgb_screen_size", None,
                            "Resolution for rendered screen.")
    point_flag.DEFINE_point("rgb_minimap_size", None,
                            "Resolution for rendered minimap.")
    flags.DEFINE_enum("action_space", None, sc2_env.ActionSpace._member_names_,  # pylint: disable=protected-access
                      "Which action space to use. Needed if you take both feature "
                      "and rgb observations.")
    flags.DEFINE_bool("use_feature_units", False,
                      "Whether to include feature units.")
    flags.DEFINE_bool("disable_fog", False, "Whether to disable Fog of War.")

    flags.DEFINE_integer("max_agent_steps", 0, "Total agent steps.")
    flags.DEFINE_integer("game_steps_per_episode", None, "Game steps per episode.")
    flags.DEFINE_integer("max_episodes", 0, "Total episodes.")
    flags.DEFINE_integer("step_mul", 8, "Game steps per agent step.")

    flags.DEFINE_string("agent", "pysc2.agents.random_agent.RandomAgent",
                        "Which agent to run, as a python path to an Agent class.")
    flags.DEFINE_enum("agent_race", "random", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 1's race.")

    flags.DEFINE_string("agent2", "Bot", "Second agent, either Bot or agent class.")
    flags.DEFINE_enum("agent2_race", "random", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 2's race.")
    flags.DEFINE_enum("difficulty", "very_easy", sc2_env.Difficulty._member_names_,  # pylint: disable=protected-access
                      "If agent2 is a built-in Bot, it's strength.")

    flags.DEFINE_bool("profile", False, "Whether to turn on code profiling.")
    flags.DEFINE_bool("trace", False, "Whether to trace the code execution.")
    flags.DEFINE_integer("parallel", 1, "How many instances to run in parallel.")

    flags.DEFINE_bool("save_replay", True, "Whether to save a replay at the end.")

    flags.DEFINE_string("map", None, "Name of a map to use.")
    flags.mark_flag_as_required("map")

flags_defined = True

def run_thread(agent_classes, players, map_name, visualize):
  """Run one thread worth of the environment with agents."""
  with sc2_env.SC2Env(
      map_name=map_name,
      players=players,
      agent_interface_format=sc2_env.parse_agent_interface_format(
          feature_screen=FLAGS.feature_screen_size,
          feature_minimap=FLAGS.feature_minimap_size,
          rgb_screen=FLAGS.rgb_screen_size,
          rgb_minimap=FLAGS.rgb_minimap_size,
          action_space=FLAGS.action_space,
          use_feature_units=FLAGS.use_feature_units),
      step_mul=FLAGS.step_mul,
      game_steps_per_episode=FLAGS.game_steps_per_episode,
      disable_fog=FLAGS.disable_fog,
      visualize=visualize) as env:
    env = available_actions_printer.AvailableActionsPrinter(env)
    agents = [agent_cls() for agent_cls in agent_classes]
    run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes)
    if FLAGS.save_replay:
      env.save_replay(agent_classes[0].__name__)


def main(unused_argv):
  """Run an agent."""
  if (FLAGS.profile or FLAGS.trace):
    stopwatch.sw.enable()
  if FLAGS.trace:
    stopwatch.sw.trace()

  map_inst = maps.get(FLAGS.map)

  agent_classes = []
  players = []

  agent_module, agent_name = FLAGS.agent.rsplit(".", 1)
  agent_cls = getattr(importlib.import_module(agent_module), agent_name)
  agent_classes.append(agent_cls)
  players.append(sc2_env.Agent(sc2_env.Race[FLAGS.agent_race]))

  if map_inst.players >= 2:
    if FLAGS.agent2 == "Bot":
      players.append(sc2_env.Bot(sc2_env.Race[FLAGS.agent2_race],
                                 sc2_env.Difficulty[FLAGS.difficulty]))
    else:
      agent_module, agent_name = FLAGS.agent2.rsplit(".", 1)
      agent_cls = getattr(importlib.import_module(agent_module), agent_name)
      agent_classes.append(agent_cls)
      players.append(sc2_env.Agent(sc2_env.Race[FLAGS.agent2_race]))

  threads = []
  for _ in range(FLAGS.parallel - 1):
    t = threading.Thread(target=run_thread,
                         args=(agent_classes, players, FLAGS.map, False))
    threads.append(t)
    t.start()

  run_thread(agent_classes, players, FLAGS.map, FLAGS.render)

  for t in threads:
    t.join()

  if FLAGS.profile:
    print(stopwatch.sw)


def entry_point():  # Needed so setup.py scripts work.
  app.run(main)


if __name__ == "__main__":
  app.run(main)

I1203 10:07:31.491242 140670063929152 sc_process.py:135] Launching SC2: /home/nsml/StarCraftII/Versions/Base59877/SC2_x64 -listen 127.0.0.1 -port 19953 -dataDir /home/nsml/StarCraftII/ -tempDir /tmp/sc-l1kwwsac/
I1203 10:07:31.499730 140670063929152 remote_controller.py:167] Connecting to: ws://127.0.0.1:19953/sc2api, attempt: 0, running: True
I1203 10:07:32.507134 140670063929152 remote_controller.py:167] Connecting to: ws://127.0.0.1:19953/sc2api, attempt: 1, running: True
I1203 10:07:33.512880 140670063929152 remote_controller.py:167] Connecting to: ws://127.0.0.1:19953/sc2api, attempt: 2, running: True
E1203 10:12:00.079802 140670063929152 renderer_human.py:242] Failed to get the resolution from xrandr.
E1203 10:12:00.081105 140670063929152 renderer_human.py:399] ------------------------------------------------------------
E1203 10:12:00.081902 140670063929152 renderer_human.py:400] Failed to initialize pygame: video system not initialized
E1203 10:12:00.084350 140670063929152 rend

   0/no_op                                              ()
   1/move_camera                                        (1/minimap [64, 64])
   2/select_point                                       (6/select_point_act [4]; 0/screen [84, 84])
   3/select_rect                                        (7/select_add [2]; 0/screen [84, 84]; 2/screen2 [84, 84])
   4/select_control_group                               (4/control_group_act [5]; 5/control_group_id [10])
   5/select_unit                                        (8/select_unit_act [4]; 9/select_unit_id [500])
 453/Stop_quick                                         (3/queued [2])
 549/Effect_Spray_minimap                               (3/queued [2]; 1/minimap [64, 64])
 230/Effect_Spray_screen                                (3/queued [2]; 0/screen [84, 84])
 264/Harvest_Gather_screen                              (3/queued [2]; 0/screen [84, 84])
 451/Smart_screen                                       (3/queued [2]; 0/screen [84, 84])
 331/Mo

I1203 10:12:19.071764 140670063929152 sc2_env.py:725] Episode 1 finished after 11248 game steps. Outcome: [-1], reward: [-1], score: [145]
I1203 10:12:22.734069 140670063929152 sc2_env.py:507] Starting episode 2: [random, random] on Simple64
I1203 10:12:38.865216 140670063929152 sc2_env.py:725] Episode 2 finished after 9272 game steps. Outcome: [-1], reward: [-1], score: [100]
I1203 10:12:42.847983 140670063929152 sc2_env.py:507] Starting episode 3: [random, random] on Simple64


  51/Build_EvolutionChamber_screen                      (3/queued [2]; 0/screen [84, 84])
  84/Build_SpawningPool_screen                          (3/queued [2]; 0/screen [84, 84])
 483/Train_Overlord_quick                               (3/queued [2])
  11/build_queue                                        (11/build_queue_id [10])


I1203 10:13:04.377730 140670063929152 sc2_env.py:725] Episode 3 finished after 13008 game steps. Outcome: [-1], reward: [-1], score: [335]
I1203 10:13:08.370600 140670063929152 sc2_env.py:507] Starting episode 4: [random, random] on Simple64


  59/Build_Hatchery_screen                              (3/queued [2]; 0/screen [84, 84])


I1203 10:13:24.712034 140670063929152 sc2_env.py:725] Episode 4 finished after 9664 game steps. Outcome: [-1], reward: [-1], score: [1360]
I1203 10:13:28.878094 140670063929152 sc2_env.py:507] Starting episode 5: [random, random] on Simple64
I1203 10:13:47.873922 140670063929152 sc2_env.py:725] Episode 5 finished after 9088 game steps. Outcome: [-1], reward: [-1], score: [130]
I1203 10:13:51.672867 140670063929152 sc2_env.py:507] Starting episode 6: [random, random] on Simple64


  44/Build_CommandCenter_screen                         (3/queued [2]; 0/screen [84, 84])
  50/Build_EngineeringBay_screen                        (3/queued [2]; 0/screen [84, 84])
  79/Build_Refinery_screen                              (3/queued [2]; 0/screen [84, 84])
  91/Build_SupplyDepot_screen                           (3/queued [2]; 0/screen [84, 84])
 220/Effect_Repair_screen                               (3/queued [2]; 0/screen [84, 84])
 221/Effect_Repair_autocast                             ()
 281/Lift_quick                                         (3/queued [2])
 294/LoadAll_quick                                      (3/queued [2])
 490/Train_SCV_quick                                    (3/queued [2])
 261/Halt_quick                                         (3/queued [2])
  42/Build_Barracks_screen                              (3/queued [2]; 0/screen [84, 84])
  64/Build_MissileTurret_screen                         (3/queued [2]; 0/screen [84, 84])
 318/Morph_SupplyDepot_Lowe

I1203 10:14:10.165261 140670063929152 sc2_env.py:752] Environment Close
I1203 10:14:10.171051 140670063929152 sc_process.py:232] Shutdown gracefully.
I1203 10:14:10.172873 140670063929152 sc_process.py:210] Shutdown with return code: -2


Took 130.038 seconds for 7942 steps: 61.075 fps


ConnectionError: Error during save_replay: Socket error: [Errno 104] Connection reset by peer

In [11]:
!echo $DISPLAY

192.168.1.10:0.0


In [12]:
!xrandr

Screen 0: minimum 0 x 0, current 1920 x 1080, maximum 32767 x 32767
default connected primary 1920x1080+0+0 509mm x 571mm
   1920x1080      0.00* 


In [1]:
%tb

No traceback available to show.


In [7]:
from pysc2.agents import base_agent

class A3CAgent(base_agent.BaseAgent):
    
    def step(self, obs):
        super(A3CAgent, self).step(obs)
        
        return actions.FUNCTIONS.no_op()

In [8]:
from pysc2.lib import actions, features

def main(unused_argv):
    agent = A3CAgent()
    try:
        while True:
            with sc2_env.SC2Env(
                map_name="AbyssalReef",
                players=[sc2_env.Agent(sc2_env.Race.zerg),
                   sc2_env.Bot(sc2_env.Race.random, sc2_env.Difficulty.very_easy)],
                agent_interface_format=features.AgentInterfaceFormat(
                    feature_dimensions=features.Dimensions(screen=84, minimap=64)),
                step_mul=16,
                game_steps_per_episode=0,
                visualize=True) as env:

                    agent.setup(env.observation_spec(), env.action_spec())

                    timesteps = env.reset()
                    agent.reset()

                    while True:
                        step_actions = [agent.step(timesteps[0])]
                        if timesteps[0].last():
                            break
                        timesteps = env.step(step_actions)

    except KeyboardInterrupt:
        pass

if __name__ == "__main__":
    app.run(main)

I1202 10:41:57.492174 140050757158720 sc_process.py:135] Launching SC2: /home/nsml/StarCraftII/Versions/Base59877/SC2_x64 -listen 127.0.0.1 -port 17523 -dataDir /home/nsml/StarCraftII/ -tempDir /tmp/sc-9vof9lfs/
I1202 10:41:57.504985 140050757158720 remote_controller.py:167] Connecting to: ws://127.0.0.1:17523/sc2api, attempt: 0, running: True
I1202 10:41:58.509291 140050757158720 remote_controller.py:167] Connecting to: ws://127.0.0.1:17523/sc2api, attempt: 1, running: True
I1202 10:41:59.514740 140050757158720 remote_controller.py:167] Connecting to: ws://127.0.0.1:17523/sc2api, attempt: 2, running: True
I1202 10:42:08.272984 140050757158720 sc2_env.py:314] Environment is ready
I1202 10:42:08.273876 140050757158720 sc2_env.py:507] Starting episode 1: [zerg, random] on AbyssalReef
Exception in thread Renderer:
Traceback (most recent call last):
  File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.6/threading.py", line 864, in 

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Tests

In [6]:
import sys
sys.argv = ["python", "--map", "Simple64"]

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run an agent."""

FLAGS = flags.FLAGS

# because of Abseil's horrible design for running code underneath Colabs
# We have to pull out this ugly hack from the hat
if "flags_defined" not in globals():
    flags.DEFINE_bool("render", True, "Whether to render with pygame.")
    point_flag.DEFINE_point("feature_screen_size", "84",
                            "Resolution for screen feature layers.")
    point_flag.DEFINE_point("feature_minimap_size", "64",
                            "Resolution for minimap feature layers.")
    point_flag.DEFINE_point("rgb_screen_size", None,
                            "Resolution for rendered screen.")
    point_flag.DEFINE_point("rgb_minimap_size", None,
                            "Resolution for rendered minimap.")
    flags.DEFINE_enum("action_space", None, sc2_env.ActionSpace._member_names_,  # pylint: disable=protected-access
                      "Which action space to use. Needed if you take both feature "
                      "and rgb observations.")
    flags.DEFINE_bool("use_feature_units", False,
                      "Whether to include feature units.")
    flags.DEFINE_bool("disable_fog", False, "Whether to disable Fog of War.")

    flags.DEFINE_integer("max_agent_steps", 0, "Total agent steps.")
    flags.DEFINE_integer("game_steps_per_episode", None, "Game steps per episode.")
    flags.DEFINE_integer("max_episodes", 0, "Total episodes.")
    flags.DEFINE_integer("step_mul", 8, "Game steps per agent step.")

    flags.DEFINE_string("agent", "pysc2.agents.random_agent.RandomAgent",
                        "Which agent to run, as a python path to an Agent class.")
    flags.DEFINE_enum("agent_race", "random", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 1's race.")

    flags.DEFINE_string("agent2", "Bot", "Second agent, either Bot or agent class.")
    flags.DEFINE_enum("agent2_race", "random", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 2's race.")
    flags.DEFINE_enum("difficulty", "very_easy", sc2_env.Difficulty._member_names_,  # pylint: disable=protected-access
                      "If agent2 is a built-in Bot, it's strength.")

    flags.DEFINE_bool("profile", False, "Whether to turn on code profiling.")
    flags.DEFINE_bool("trace", False, "Whether to trace the code execution.")
    flags.DEFINE_integer("parallel", 1, "How many instances to run in parallel.")

    flags.DEFINE_bool("save_replay", True, "Whether to save a replay at the end.")

    flags.DEFINE_string("map", None, "Name of a map to use.")
    flags.mark_flag_as_required("map")

flags_defined = True

In [7]:
ACTION_DO_NOTHING = 'donothing'
ACTION_SELECT_SCV = 'selectscv'
ACTION_BUILD_SUPPLY_DEPOT = 'buildsupplydepot'
ACTION_BUILD_BARRACKS = 'buildbarracks'
ACTION_SELECT_BARRACKS = 'selectbarracks'
ACTION_BUILD_MARINE = 'buildmarine'
ACTION_SELECT_ARMY = 'selectarmy'
ACTION_ATTACK = 'attack'

smart_actions = [
    ACTION_DO_NOTHING,
    ACTION_SELECT_SCV,
    ACTION_BUILD_SUPPLY_DEPOT,
    ACTION_BUILD_BARRACKS,
    ACTION_SELECT_BARRACKS,
    ACTION_BUILD_MARINE,
    ACTION_SELECT_ARMY,
]

for mm_x in range(0, 64):
    for mm_y in range(0, 64):
        if (mm_x + 1) % 16 == 0 and (mm_y + 1) % 16 == 0:
            smart_actions.append(ACTION_ATTACK + '_' + str(mm_x - 8) + '_' + str(mm_y - 8))

In [8]:
class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
        self.actions = actions
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon = e_greedy
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)

    def choose_action(self, observation):
        self.check_state_exist(observation)
        
        if np.random.uniform() < self.epsilon:
            # choose best action
            state_action = self.q_table.loc[observation, :]
            
            # some actions have the same value
            state_action = state_action.reindex(np.random.permutation(state_action.index))
            
            action = state_action.idxmax()
        else:
            # choose random action
            action = np.random.choice(self.actions)
            
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        self.check_state_exist(s)
        
        q_predict = self.q_table.loc[s, a]
        q_target = r + self.gamma * self.q_table.loc[s_, :].max()
        
        # update
        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            # append new state to q table
            self.q_table = self.q_table.append(pd.Series([0] * len(self.actions), index=self.q_table.columns, name=state))

In [9]:
import os

from pysc2 import maps
from pysc2.env import available_actions_printer
from pysc2.env import run_loop
from pysc2.env import sc2_env
from pysc2.lib import point_flag
from pysc2.lib import stopwatch

from absl import app
from absl import flags
from future.builtins import range

import random
import math

import numpy as np
import pandas as pd

from pysc2.agents import base_agent
from pysc2.lib import actions
from pysc2.lib import features

_NO_OP = actions.FUNCTIONS.no_op.id
_SELECT_POINT = actions.FUNCTIONS.select_point.id
_BUILD_SUPPLY_DEPOT = actions.FUNCTIONS.Build_SupplyDepot_screen.id
_BUILD_BARRACKS = actions.FUNCTIONS.Build_Barracks_screen.id
_TRAIN_MARINE = actions.FUNCTIONS.Train_Marine_quick.id
_SELECT_ARMY = actions.FUNCTIONS.select_army.id
_ATTACK_MINIMAP = actions.FUNCTIONS.Attack_minimap.id

_PLAYER_RELATIVE = features.SCREEN_FEATURES.player_relative.index
_UNIT_TYPE = features.SCREEN_FEATURES.unit_type.index
_PLAYER_ID = features.SCREEN_FEATURES.player_id.index

_PLAYER_SELF = 1

_TERRAN_COMMANDCENTER = 18
_TERRAN_SCV = 45 
_TERRAN_SUPPLY_DEPOT = 19
_TERRAN_BARRACKS = 21

_NOT_QUEUED = [0]
_QUEUED = [1]

_PLAYER_HOSTILE = 4

KILL_UNIT_REWARD = 0.2
KILL_BUILDING_REWARD = 0.5

buffer = []

class SmartAgent(base_agent.BaseAgent):
    
    def __init__(self):
        super(SmartAgent, self).__init__()
        
        self.qlearn = QLearningTable(actions=list(range(len(smart_actions))))
        
        self.previous_killed_unit_score = 0
        self.previous_killed_building_score = 0
        
        self.previous_action = None
        self.previous_state = None
        
    def transformDistance(self, x, x_distance, y, y_distance):
        if not self.base_top_left:
            return [x - x_distance, y - y_distance]
        
        return [x + x_distance, y + y_distance]
    
    def transformLocation(self, x, y):
        if not self.base_top_left:
            return [64 - x, 64 - y]
        
        return [x, y]
        
        return [x + x_distance, y + y_distance]
        
    def step(self, obs):
        super(SmartAgent, self).step(obs)
        
        buffer.append(obs)
        player_y, player_x = (obs.observation['feature_minimap'][_PLAYER_RELATIVE] == _PLAYER_SELF).nonzero()
        self.base_top_left = 1 if player_y.any() and player_y.mean() <= 31 else 0
        
        unit_type = obs.observation['feature_screen'][_UNIT_TYPE]

        depot_y, depot_x = (unit_type == _TERRAN_SUPPLY_DEPOT).nonzero()
        supply_depot_count = 1 if depot_y.any() else 0

        barracks_y, barracks_x = (unit_type == _TERRAN_BARRACKS).nonzero()
        barracks_count = 1 if barracks_y.any() else 0
            
        supply_limit = obs.observation['player'][4]
        army_supply = obs.observation['player'][5]
        
        killed_unit_score = obs.observation['score_cumulative'][5]
        killed_building_score = obs.observation['score_cumulative'][6]
        
        current_state = np.zeros(20)
        current_state[0] = supply_depot_count
        current_state[1] = barracks_count
        current_state[2] = supply_limit
        current_state[3] = army_supply

        hot_squares = np.zeros(16)        
        enemy_y, enemy_x = (obs.observation['feature_minimap'][_PLAYER_RELATIVE] == _PLAYER_HOSTILE).nonzero()
        for i in range(0, len(enemy_y)):
            y = int(math.ceil((enemy_y[i] + 1) / 16))
            x = int(math.ceil((enemy_x[i] + 1) / 16))
            
            hot_squares[((y - 1) * 4) + (x - 1)] = 1
        
        if not self.base_top_left:
            hot_squares = hot_squares[::-1]
        
        for i in range(0, 16):
            current_state[i + 4] = hot_squares[i]
        
        if self.previous_action is not None:
            reward = 0
                
            if killed_unit_score > self.previous_killed_unit_score:
                reward += KILL_UNIT_REWARD
                    
            if killed_building_score > self.previous_killed_building_score:
                reward += KILL_BUILDING_REWARD
                
            self.qlearn.learn(str(self.previous_state), self.previous_action, reward, str(current_state))
                
        rl_action = self.qlearn.choose_action(str(current_state))
        smart_action = smart_actions[rl_action]
        
        self.previous_killed_unit_score = killed_unit_score
        self.previous_killed_building_score = killed_building_score
        self.previous_state = current_state
        self.previous_action = rl_action
        
        x = 0
        y = 0
        if '_' in smart_action:
            smart_action, x, y = smart_action.split('_')
    
        if smart_action == ACTION_DO_NOTHING:
            return actions.FunctionCall(_NO_OP, [])

        elif smart_action == ACTION_SELECT_SCV:
            unit_type = obs.observation['feature_screen'][_UNIT_TYPE]
            unit_y, unit_x = (unit_type == _TERRAN_SCV).nonzero()
                
            if unit_y.any():
                i = random.randint(0, len(unit_y) - 1)
                target = [unit_x[i], unit_y[i]]
                
                return actions.FunctionCall(_SELECT_POINT, [_NOT_QUEUED, target])
        
        elif smart_action == ACTION_BUILD_SUPPLY_DEPOT:
            if _BUILD_SUPPLY_DEPOT in obs.observation['available_actions']:
                unit_type = obs.observation['feature_screen'][_UNIT_TYPE]
                unit_y, unit_x = (unit_type == _TERRAN_COMMANDCENTER).nonzero()
                
                if unit_y.any():
                    target = self.transformDistance(int(unit_x.mean()), 0, int(unit_y.mean()), 20)
                
                    return actions.FunctionCall(_BUILD_SUPPLY_DEPOT, [_NOT_QUEUED, target])
        
        elif smart_action == ACTION_BUILD_BARRACKS:
            if _BUILD_BARRACKS in obs.observation['available_actions']:
                unit_type = obs.observation['feature_screen'][_UNIT_TYPE]
                unit_y, unit_x = (unit_type == _TERRAN_COMMANDCENTER).nonzero()
                
                if unit_y.any():
                    target = self.transformDistance(int(unit_x.mean()), 20, int(unit_y.mean()), 0)
            
                    return actions.FunctionCall(_BUILD_BARRACKS, [_NOT_QUEUED, target])
    
        elif smart_action == ACTION_SELECT_BARRACKS:
            unit_type = obs.observation['feature_screen'][_UNIT_TYPE]
            unit_y, unit_x = (unit_type == _TERRAN_BARRACKS).nonzero()
                
            if unit_y.any():
                target = [int(unit_x.mean()), int(unit_y.mean())]
        
                return actions.FunctionCall(_SELECT_POINT, [_NOT_QUEUED, target])
        
        elif smart_action == ACTION_BUILD_MARINE:
            if _TRAIN_MARINE in obs.observation['available_actions']:
                return actions.FunctionCall(_TRAIN_MARINE, [_QUEUED])
        
        elif smart_action == ACTION_SELECT_ARMY:
            if _SELECT_ARMY in obs.observation['available_actions']:
                return actions.FunctionCall(_SELECT_ARMY, [_NOT_QUEUED])
        
        elif smart_action == ACTION_ATTACK:
            if obs.observation['single_select'].size>0 and obs.observation['single_select'][0][0] != _TERRAN_SCV and _ATTACK_MINIMAP in obs.observation["available_actions"]:
                return actions.FunctionCall(_ATTACK_MINIMAP, [_NOT_QUEUED, self.transformLocation(int(x), int(y))])
            
                return actions.FunctionCall(_ATTACK_MINIMAP, [_NOT_QUEUED, [21, 24]])
            
        return actions.FunctionCall(_NO_OP, [])

In [10]:
from pysc2.lib import actions, features

def main(unused_argv):
    try:
        with sc2_env.SC2Env(
            map_name="Simple64",
            players=[sc2_env.Agent(sc2_env.Race.terran),
               sc2_env.Bot(sc2_env.Race.random, sc2_env.Difficulty.very_easy)],
            agent_interface_format=features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=84, minimap=64)),
            step_mul=16,
            game_steps_per_episode=0,
            visualize=False) as env:

                agents = [SmartAgent()]
                run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes)
                
    except KeyboardInterrupt:
        pass

if __name__ == "__main__":
    os.environ["SC2PATH"] = "/home/nsml/StarCraftII"
    app.run(main)

I1211 08:43:18.633599 140168094603072 sc_process.py:135] Launching SC2: /home/nsml/StarCraftII/Versions/Base59877/SC2_x64 -listen 127.0.0.1 -port 20191 -dataDir /home/nsml/StarCraftII/ -tempDir /tmp/sc-9tbju_bf/
I1211 08:43:18.642589 140168094603072 remote_controller.py:167] Connecting to: ws://127.0.0.1:20191/sc2api, attempt: 0, running: True
I1211 08:43:19.646579 140168094603072 remote_controller.py:167] Connecting to: ws://127.0.0.1:20191/sc2api, attempt: 1, running: True
I1211 08:43:20.654164 140168094603072 remote_controller.py:167] Connecting to: ws://127.0.0.1:20191/sc2api, attempt: 2, running: True
I1211 08:43:27.635749 140168094603072 sc2_env.py:314] Environment is ready
I1211 08:43:27.646523 140168094603072 sc2_env.py:507] Starting episode 1: [terran, random] on Simple64
I1211 08:43:46.816362 140168094603072 sc2_env.py:725] Episode 1 finished after 12880 game steps. Outcome: [-1], reward: [-1], score: [5020]
I1211 08:43:50.858770 140168094603072 sc2_env.py:507] Starting episo

Took 71.914 seconds for 2516 steps: 34.986 fps


SystemExit: 

In [56]:
print(len(buffer))
print(len(buffer[0]))
for i in buffer[0]:
    print(i)
print(buffer[0][3].keys())
buffer[0]

1
4
StepType.FIRST
0.0
0.0
{'single_select': array([], shape=(0, 7), dtype=int32), 'multi_select': array([], shape=(0, 7), dtype=int32), 'build_queue': array([], shape=(0, 7), dtype=int32), 'cargo': array([], shape=(0, 7), dtype=int32), 'production_queue': array([], shape=(0, 2), dtype=int32), 'last_actions': array([], dtype=int32), 'cargo_slots_available': array([0], dtype=int32), 'home_race_requested': array([1], dtype=int32), 'away_race_requested': array([4], dtype=int32), 'map_name': 'AbyssalReef', 'feature_screen': NamedNumpyArray([[[ 40,  30,  26, ..., 210, 210, 210],
                  [ 35,  28,  24, ..., 210, 210, 210],
                  [ 33,  26,  22, ..., 210, 210, 210],
                  ...,
                  [210, 210, 210, ..., 255, 255, 255],
                  [210, 210, 210, ..., 255, 255, 255],
                  [210, 210, 210, ..., 255, 255, 255]],

                 [[  0,   0,   0, ...,   0,   0,   0],
                  [  0,   0,   0, ...,   0,   0,   0],
         

In [65]:
buffer[0].reward
buffer[0].discount
buffer[0].observation

{'single_select': array([], shape=(0, 7), dtype=int32),
 'multi_select': array([], shape=(0, 7), dtype=int32),
 'build_queue': array([], shape=(0, 7), dtype=int32),
 'cargo': array([], shape=(0, 7), dtype=int32),
 'production_queue': array([], shape=(0, 2), dtype=int32),
 'last_actions': array([], dtype=int32),
 'cargo_slots_available': array([0], dtype=int32),
 'home_race_requested': array([1], dtype=int32),
 'away_race_requested': array([4], dtype=int32),
 'map_name': 'AbyssalReef',
 'feature_screen': NamedNumpyArray([[[ 40,  30,  26, ..., 210, 210, 210],
                   [ 35,  28,  24, ..., 210, 210, 210],
                   [ 33,  26,  22, ..., 210, 210, 210],
                   ...,
                   [210, 210, 210, ..., 255, 255, 255],
                   [210, 210, 210, ..., 255, 255, 255],
                   [210, 210, 210, ..., 255, 255, 255]],
 
                  [[  0,   0,   0, ...,   0,   0,   0],
                   [  0,   0,   0, ...,   0,   0,   0],
                 

In [33]:
buffer[0].observation['single_select'].size

0