# STEP 2 - Making RL PySC2 Agent

In [1]:
%load_ext autoreload
%autoreload 2

## 0. Runnning 'Agent code' on jupyter notebook 

In [2]:
# unfortunately, PySC2 uses Abseil, which treats python code as if its run like an app
# This does not play well with jupyter notebook
# So we will need to monkeypatch sys.argv


import sys
#sys.argv = ["python", "--map", "AbyssalReef"]
sys.argv = ["python", "--map", "Simple64"]

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run an agent."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import importlib
import threading

from absl import app
from absl import flags
from future.builtins import range  # pylint: disable=redefined-builtin

from pysc2 import maps
from pysc2.env import available_actions_printer
from pysc2.env import run_loop
from pysc2.env import sc2_env
from pysc2.lib import point_flag
from pysc2.lib import stopwatch

FLAGS = flags.FLAGS

# because of Abseil's horrible design for running code underneath Colabs
# We have to pull out this ugly hack from the hat
if "flags_defined" not in globals():
    flags.DEFINE_bool("render", True, "Whether to render with pygame.")
    point_flag.DEFINE_point("feature_screen_size", "84",
                            "Resolution for screen feature layers.")
    point_flag.DEFINE_point("feature_minimap_size", "64",
                            "Resolution for minimap feature layers.")
    point_flag.DEFINE_point("rgb_screen_size", None,
                            "Resolution for rendered screen.")
    point_flag.DEFINE_point("rgb_minimap_size", None,
                            "Resolution for rendered minimap.")
    flags.DEFINE_enum("action_space", None, sc2_env.ActionSpace._member_names_,  # pylint: disable=protected-access
                      "Which action space to use. Needed if you take both feature "
                      "and rgb observations.")
    flags.DEFINE_bool("use_feature_units", True,
                      "Whether to include feature units.")
    flags.DEFINE_bool("disable_fog", False, "Whether to disable Fog of War.")

    flags.DEFINE_integer("max_agent_steps", 0, "Total agent steps.")
    flags.DEFINE_integer("game_steps_per_episode", None, "Game steps per episode.")
    flags.DEFINE_integer("max_episodes", 0, "Total episodes.")
    flags.DEFINE_integer("step_mul", 8, "Game steps per agent step.")
    flags.DEFINE_float("fps", 22.4, "Frames per second to run the game.")

    #flags.DEFINE_string("agent", "sc2.agent.BasicAgent.ZergBasicAgent",
    #                    "Which agent to run, as a python path to an Agent class.")
    #flags.DEFINE_enum("agent_race", "zerg", sc2_env.Race._member_names_,  # pylint: disable=protected-access
    #                  "Agent 1's race.")
    flags.DEFINE_string("agent", "TerranBasicAgent",
                        "Which agent to run, as a python path to an Agent class.")
    flags.DEFINE_enum("agent_race", "terran", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 1's race.")

    flags.DEFINE_string("agent2", "Bot", "Second agent, either Bot or agent class.")
    flags.DEFINE_enum("agent2_race", "random", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 2's race.")
    flags.DEFINE_enum("difficulty", "very_easy", sc2_env.Difficulty._member_names_,  # pylint: disable=protected-access
                      "If agent2 is a built-in Bot, it's strength.")

    flags.DEFINE_bool("profile", False, "Whether to turn on code profiling.")
    flags.DEFINE_bool("trace", False, "Whether to trace the code execution.")
    flags.DEFINE_integer("parallel", 1, "How many instances to run in parallel.")

    flags.DEFINE_bool("save_replay", True, "Whether to save a replay at the end.")

    flags.DEFINE_string("map", None, "Name of a map to use.")
    flags.mark_flag_as_required("map")

flags_defined = True

def run_thread(agent_classes, players, map_name, visualize):
  """Run one thread worth of the environment with agents."""
  with sc2_env.SC2Env(
      map_name=map_name,
      players=players,
      agent_interface_format=sc2_env.parse_agent_interface_format(
          feature_screen=FLAGS.feature_screen_size,
          feature_minimap=FLAGS.feature_minimap_size,
          rgb_screen=FLAGS.rgb_screen_size,
          rgb_minimap=FLAGS.rgb_minimap_size,
          action_space=FLAGS.action_space,
          use_feature_units=FLAGS.use_feature_units),
      step_mul=FLAGS.step_mul,
      game_steps_per_episode=FLAGS.game_steps_per_episode,
      disable_fog=FLAGS.disable_fog,
      visualize=visualize) as env:
    env = available_actions_printer.AvailableActionsPrinter(env)
    agents = [agent_cls() for agent_cls in agent_classes]
    run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes)
    if FLAGS.save_replay:
      env.save_replay(agent_classes[0].__name__)

def main(unused_argv):
  """Run an agent."""
  #stopwatch.sw.enabled = FLAGS.profile or FLAGS.trace
  #stopwatch.sw.trace = FLAGS.trace

  map_inst = maps.get(FLAGS.map)

  agent_classes = []
  players = []

  #agent_module, agent_name = FLAGS.agent.rsplit(".", 1)
  #agent_cls = getattr(importlib.import_module(agent_module), agent_name)
  #agent_classes.append(agent_cls)
  agent_classes.append(TerranRLAgent)
  players.append(sc2_env.Agent(sc2_env.Race[FLAGS.agent_race]))

  if map_inst.players >= 2:
    if FLAGS.agent2 == "Bot":
      players.append(sc2_env.Bot(sc2_env.Race[FLAGS.agent2_race],
                                 sc2_env.Difficulty[FLAGS.difficulty]))
    else:
      agent_module, agent_name = FLAGS.agent2.rsplit(".", 1)
      agent_cls = getattr(importlib.import_module(agent_module), agent_name)
      agent_classes.append(agent_cls)
      players.append(sc2_env.Agent(sc2_env.Race[FLAGS.agent2_race]))

  threads = []
  for _ in range(FLAGS.parallel - 1):
    t = threading.Thread(target=run_thread,
                         args=(agent_classes, players, FLAGS.map, False))
    threads.append(t)
    t.start()

  run_thread(agent_classes, players, FLAGS.map, FLAGS.render)

  for t in threads:
    t.join()

  if FLAGS.profile:
    pass
    #print(stopwatch.sw)

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


## 1. Creating a RL PySC2 Agent

In [3]:
import random
import time
import math

import numpy as np
import pandas as pd


from pysc2.agents import base_agent
from pysc2.env import sc2_env
from pysc2.lib import actions, features, units
from absl import app

In [4]:
# reference from https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow
class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
        self.actions = actions  # a list
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon = e_greedy
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)

    def choose_action(self, observation):
        self.check_state_exist(observation)
        
        if np.random.uniform() < self.epsilon:
            # choose best action
            # state_action = self.q_table.ix[observation, :]
            state_action = self.q_table.loc[observation, self.q_table.columns[:]]
            
            # some actions have the same value
            state_action = state_action.reindex(np.random.permutation(state_action.index))
            
            action = state_action.idxmax()
        else:
            # choose random action
            action = np.random.choice(self.actions)
            
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        self.check_state_exist(s)
        
        #q_predict = self.q_table.ix[s, a]
        q_predict = self.q_table.loc[s, a]
        #q_target = r + self.gamma * self.q_table.ix[s_, :].max()
        q_target = r + self.gamma * self.q_table.loc[s_, self.q_table.columns[:]].max()
        
        # update
        #self.q_table.ix[s, a] += self.lr * (q_target - q_predict)
        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            # append new state to q table
            self.q_table = self.q_table.append(pd.Series([0] * len(self.actions), index=self.q_table.columns, name=state))

In [6]:
class TerranRLAgent(base_agent.BaseAgent):
    def __init__(self):
        super(TerranRLAgent, self).__init__()

        self.base_top_left = None

    def transformLocation(self, x, x_distance, y, y_distance):
        if not self.base_top_left:
            return [x - x_distance, y - y_distance]
        
        return [x + x_distance, y + y_distance]
        
    def step(self, obs):
        super(TerranRLAgent, self).step(obs)

        time.sleep(0.5)
        
        if self.base_top_left is None:
            player_y, player_x = (obs.observation.feature_minimap.player_relative == features.PlayerRelative.SELF).nonzero()
            self.base_top_left = player_y.mean() <= 31

        return actions.FUNCTIONS.no_op()

### [run code]

In [7]:
if __name__ == "__main__":
  app.run(main)

I0624 17:04:47.196283 4397297088 sc_process.py:135] Launching SC2: /Applications/StarCraft II/Versions/Base80188/SC2.app/Contents/MacOS/SC2 -listen 127.0.0.1 -port 15235 -dataDir /Applications/StarCraft II/ -tempDir /var/folders/kl/h0d5qxj551x0d2y091w17l1h0000gn/T/sc-76bz22jy/ -displayMode 0 -windowwidth 640 -windowheight 480 -windowx 50 -windowy 50
I0624 17:04:47.203261 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:15235/sc2api, attempt: 0, running: True
I0624 17:04:48.222778 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:15235/sc2api, attempt: 1, running: True
I0624 17:04:49.227516 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:15235/sc2api, attempt: 2, running: True
I0624 17:04:50.234404 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:15235/sc2api, attempt: 3, running: True
I0624 17:04:51.238704 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:15235/sc2api, attempt: 4, running: True
I06

   0/no_op                                              ()
   1/move_camera                                        (1/minimap [64, 64])
   2/select_point                                       (6/select_point_act [4]; 0/screen [84, 84])
   3/select_rect                                        (7/select_add [2]; 0/screen [84, 84]; 2/screen2 [84, 84])
   4/select_control_group                               (4/control_group_act [5]; 5/control_group_id [10])
Took 26.501 seconds for 50 steps: 1.887 fps


I0624 17:05:45.059388 4397297088 sc2_env.py:752] Environment Close
I0624 17:05:45.062393 4397297088 sc_process.py:232] Shutdown gracefully.
I0624 17:05:45.063596 4397297088 sc_process.py:210] Shutdown with return code: 1


ConnectionError: Error during save_replay: Socket error: [Errno 54] Connection reset by peer

## 2. Defining Actions

In [12]:
import random
import time
import math

import numpy as np
import pandas as pd


from pysc2.agents import base_agent
from pysc2.env import sc2_env
from pysc2.lib import actions, features, units
from absl import app

In [13]:
ACTION_DO_NOTHING = 'donothing'
ACTION_SELECT_SCV = 'selectscv'
ACTION_BUILD_SUPPLY_DEPOT = 'buildsupplydepot'
ACTION_BUILD_BARRACKS = 'buildbarracks'
ACTION_SELECT_BARRACKS = 'selectbarracks'
ACTION_BUILD_MARINE = 'buildmarine'
ACTION_SELECT_ARMY = 'selectarmy'
ACTION_ATTACK = 'attack'

smart_actions = [
    ACTION_DO_NOTHING,
    ACTION_SELECT_SCV,
    ACTION_BUILD_SUPPLY_DEPOT,
    ACTION_BUILD_BARRACKS,
    ACTION_SELECT_BARRACKS,
    ACTION_BUILD_MARINE,
    ACTION_SELECT_ARMY,
    ACTION_ATTACK,
]

In [14]:
# reference from https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow
class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
        self.actions = actions  # a list
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon = e_greedy
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)

    def choose_action(self, observation):
        self.check_state_exist(observation)
        
        if np.random.uniform() < self.epsilon:
            # choose best action
            # state_action = self.q_table.ix[observation, :]
            state_action = self.q_table.loc[observation, self.q_table.columns[:]]
            
            # some actions have the same value
            state_action = state_action.reindex(np.random.permutation(state_action.index))
            
            action = state_action.idxmax()
        else:
            # choose random action
            action = np.random.choice(self.actions)
            
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        self.check_state_exist(s)
        
        #q_predict = self.q_table.ix[s, a]
        q_predict = self.q_table.loc[s, a]
        #q_target = r + self.gamma * self.q_table.ix[s_, :].max()
        q_target = r + self.gamma * self.q_table.loc[s_, self.q_table.columns[:]].max()
        
        # update
        #self.q_table.ix[s, a] += self.lr * (q_target - q_predict)
        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            # append new state to q table
            self.q_table = self.q_table.append(pd.Series([0] * len(self.actions), index=self.q_table.columns, name=state))

In [15]:
class TerranRLAgent(base_agent.BaseAgent):
    def __init__(self):
        super(TerranRLAgent, self).__init__()

        self.base_top_left = None
        self.qlearn = QLearningTable(actions=list(range(len(smart_actions))))

    def transformLocation(self, x, x_distance, y, y_distance):
        if not self.base_top_left:
            return [x - x_distance, y - y_distance]
        
        return [x + x_distance, y + y_distance]

    def unit_type_is_selected(self, obs, unit_type):
        if (len(obs.observation.single_select) > 0 and
            obs.observation.single_select[0].unit_type == unit_type):
              return True

        if (len(obs.observation.multi_select) > 0 and
            obs.observation.multi_select[0].unit_type == unit_type):
              return True

        return False

    def get_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.feature_units
                if unit.unit_type == unit_type]

    def can_do(self, obs, action):
        return action in obs.observation.available_actions
    
    def step(self, obs):
        super(TerranRLAgent, self).step(obs)

        time.sleep(0.5)
        
        if self.base_top_left is None:
            player_y, player_x = (obs.observation.feature_minimap.player_relative == features.PlayerRelative.SELF).nonzero()
            self.base_top_left = player_y.mean() <= 31

        smart_action = smart_actions[random.randrange(0, len(smart_actions) - 1)]
        
        if smart_action == ACTION_DO_NOTHING:
            return actions.FUNCTIONS.no_op()

        elif smart_action == ACTION_SELECT_SCV:
            scvs = self.get_units_by_type(obs, units.Terran.SCV)
            if len(scvs) > 0:
                scv = random.choice(scvs)
                return actions.FUNCTIONS.select_point("select_all_type", (scv.x,
                                                                          scv.y))
        
        elif smart_action == ACTION_BUILD_SUPPLY_DEPOT:
            if self.can_do(obs, actions.FUNCTIONS.Build_SupplyDepot_screen.id):
                ccs = self.get_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    cc = random.choice(ccs)
                    target = self.transformLocation(cc.x, 0, cc.y, 20)

                    return actions.FUNCTIONS.Build_SupplyDepot_screen("now", target)        
        
        elif smart_action == ACTION_BUILD_BARRACKS:
            if self.can_do(obs, actions.FUNCTIONS.Build_Barracks_screen.id):
                ccs = self.get_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    cc = random.choice(ccs)
                    target = self.transformLocation(cc.x, 20, cc.y, 0)

                    return actions.FUNCTIONS.Build_Barracks_screen("now", target)
    
        elif smart_action == ACTION_SELECT_BARRACKS:
            barracks = self.get_units_by_type(obs, units.Terran.Barracks)
            if len(barracks) > 0:
                barrack = random.choice(barracks)
                return actions.FUNCTIONS.select_point("select_all_type", (barrack.x,
                                                                          barrack.y))
        
        elif smart_action == ACTION_BUILD_MARINE:
            if self.can_do(obs, actions.FUNCTIONS.Train_Marine_quick.id):
                return actions.FUNCTIONS.Train_Marine_quick("queued")
        
        elif smart_action == ACTION_SELECT_ARMY:
            if self.can_do(obs, actions.FUNCTIONS.select_army.id):
                return actions.FUNCTIONS.select_army("select")
        
        elif smart_action == ACTION_ATTACK:
            if self.can_do(obs, actions.FUNCTIONS.Attack_minimap.id):
                if self.base_top_left:
                    return actions.FUNCTIONS.Attack_minimap("now", [39, 45])
                else:
                    return actions.FUNCTIONS.Attack_minimap("now", [21, 24])
            
        return actions.FUNCTIONS.no_op()

### [run code]

In [16]:
if __name__ == "__main__":
  app.run(main)

I0624 19:28:27.491891 4397297088 sc_process.py:135] Launching SC2: /Applications/StarCraft II/Versions/Base80188/SC2.app/Contents/MacOS/SC2 -listen 127.0.0.1 -port 19306 -dataDir /Applications/StarCraft II/ -tempDir /var/folders/kl/h0d5qxj551x0d2y091w17l1h0000gn/T/sc-knd61k5d/ -displayMode 0 -windowwidth 640 -windowheight 480 -windowx 50 -windowy 50
I0624 19:28:27.520191 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:19306/sc2api, attempt: 0, running: True
I0624 19:28:28.528398 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:19306/sc2api, attempt: 1, running: True
I0624 19:28:29.533381 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:19306/sc2api, attempt: 2, running: True
I0624 19:28:30.539739 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:19306/sc2api, attempt: 3, running: True
I0624 19:28:31.546064 4397297088 remote_controller.py:166] Connecting to: ws://127.0.0.1:19306/sc2api, attempt: 4, running: True
I06

   0/no_op                                              ()
   1/move_camera                                        (1/minimap [64, 64])
   2/select_point                                       (6/select_point_act [4]; 0/screen [84, 84])
   3/select_rect                                        (7/select_add [2]; 0/screen [84, 84]; 2/screen2 [84, 84])
   4/select_control_group                               (4/control_group_act [5]; 5/control_group_id [10])
   5/select_unit                                        (8/select_unit_act [4]; 9/select_unit_id [500])
 264/Harvest_Gather_screen                              (3/queued [2]; 0/screen [84, 84])
  12/Attack_screen                                      (3/queued [2]; 0/screen [84, 84])
  13/Attack_minimap                                     (3/queued [2]; 1/minimap [64, 64])
 269/Harvest_Return_quick                               (3/queued [2])
 274/HoldPosition_quick                                 (3/queued [2])
 549/Effect_Spray_minimap 

I0624 19:29:53.922787 4397297088 sc2_env.py:752] Environment Close


Took 52.866 seconds for 100 steps: 1.892 fps


I0624 19:29:54.338735 4397297088 sc_process.py:232] Shutdown gracefully.
I0624 19:29:54.340004 4397297088 sc_process.py:210] Shutdown with return code: -15


ValueError: Argument is out of range for 42/Build_Barracks_screen (3/queued [2]; 0/screen [0, 0]), got: [[<Queued.now: 0>], [-5, 22]]

## 3. Defining States

In [3]:
import random
import time
import math

import numpy as np
import pandas as pd


from pysc2.agents import base_agent
from pysc2.env import sc2_env
from pysc2.lib import actions, features, units
from absl import app

In [4]:
ACTION_DO_NOTHING = 'donothing'
ACTION_SELECT_SCV = 'selectscv'
ACTION_BUILD_SUPPLY_DEPOT = 'buildsupplydepot'
ACTION_BUILD_BARRACKS = 'buildbarracks'
ACTION_SELECT_BARRACKS = 'selectbarracks'
ACTION_BUILD_MARINE = 'buildmarine'
ACTION_SELECT_ARMY = 'selectarmy'
ACTION_ATTACK = 'attack'

smart_actions = [
    ACTION_DO_NOTHING,
    ACTION_SELECT_SCV,
    ACTION_BUILD_SUPPLY_DEPOT,
    ACTION_BUILD_BARRACKS,
    ACTION_SELECT_BARRACKS,
    ACTION_BUILD_MARINE,
    ACTION_SELECT_ARMY,
    ACTION_ATTACK,
]

In [11]:
# reference from https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow
class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
        self.actions = actions  # a list
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon = e_greedy
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)

    def choose_action(self, observation):
        self.check_state_exist(observation)
        
        if np.random.uniform() < self.epsilon:
            # choose best action
            # state_action = self.q_table.ix[observation, :]
            state_action = self.q_table.loc[observation, self.q_table.columns[:]]
            
            # some actions have the same value
            state_action = state_action.reindex(np.random.permutation(state_action.index))
            
            action = state_action.idxmax()
        else:
            # choose random action
            action = np.random.choice(self.actions)
            
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        self.check_state_exist(s)
        
        #q_predict = self.q_table.ix[s, a]
        q_predict = self.q_table.loc[s, a]
        #q_target = r + self.gamma * self.q_table.ix[s_, :].max()
        q_target = r + self.gamma * self.q_table.loc[s_, self.q_table.columns[:]].max()
        
        # update
        #self.q_table.ix[s, a] += self.lr * (q_target - q_predict)
        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            # append new state to q table
            self.q_table = self.q_table.append(pd.Series([0] * len(self.actions), index=self.q_table.columns, name=state))

In [12]:
class TerranRLAgent(base_agent.BaseAgent):
    def __init__(self):
        super(TerranRLAgent, self).__init__()

        self.base_top_left = None
        self.qlearn = QLearningTable(actions=list(range(len(smart_actions))))

    def transformLocation(self, x, x_distance, y, y_distance):
        if not self.base_top_left:
            return [x - x_distance, y - y_distance]
        
        return [x + x_distance, y + y_distance]

    def unit_type_is_selected(self, obs, unit_type):
        if (len(obs.observation.single_select) > 0 and
            obs.observation.single_select[0].unit_type == unit_type):
              return True

        if (len(obs.observation.multi_select) > 0 and
            obs.observation.multi_select[0].unit_type == unit_type):
              return True

        return False

    def get_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.feature_units
                if unit.unit_type == unit_type]

    def can_do(self, obs, action):
        return action in obs.observation.available_actions
    
    def step(self, obs):
        super(TerranRLAgent, self).step(obs)

        time.sleep(0.5)
        
        if self.base_top_left is None:
            player_y, player_x = (obs.observation.feature_minimap.player_relative == features.PlayerRelative.SELF).nonzero()
            self.base_top_left = player_y.mean() <= 31

        supply_depot_count = len(self.get_units_by_type(obs, units.Terran.SupplyDepot))

        barracks_count = len(self.get_units_by_type(obs, units.Terran.Barracks))
            
        supply_limit = obs.observation.player.food_cap
        army_supply = obs.observation.player.food_used
        
        current_state = [
            supply_depot_count,
            barracks_count,
            supply_limit,
            army_supply,
        ]
        
        rl_action = self.qlearn.choose_action(str(current_state))
        smart_action = smart_actions[rl_action]
        
        if smart_action == ACTION_DO_NOTHING:
            return actions.FUNCTIONS.no_op()

        elif smart_action == ACTION_SELECT_SCV:
            scvs = self.get_units_by_type(obs, units.Terran.SCV)
            if len(scvs) > 0:
                scv = random.choice(scvs)
                return actions.FUNCTIONS.select_point("select_all_type", (scv.x,
                                                                          scv.y))
        
        elif smart_action == ACTION_BUILD_SUPPLY_DEPOT:
            if self.can_do(obs, actions.FUNCTIONS.Build_SupplyDepot_screen.id):
                ccs = self.get_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    cc = random.choice(ccs)
                    target = self.transformLocation(cc.x, 0, cc.y, 20)

                    return actions.FUNCTIONS.Build_SupplyDepot_screen("now", target)        
        
        elif smart_action == ACTION_BUILD_BARRACKS:
            if self.can_do(obs, actions.FUNCTIONS.Build_Barracks_screen.id):
                ccs = self.get_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    cc = random.choice(ccs)
                    target = self.transformLocation(cc.x, 20, cc.y, 0)

                    return actions.FUNCTIONS.Build_Barracks_screen("now", target)
    
        elif smart_action == ACTION_SELECT_BARRACKS:
            barracks = self.get_units_by_type(obs, units.Terran.Barracks)
            if len(barracks) > 0:
                barrack = random.choice(barracks)
                return actions.FUNCTIONS.select_point("select_all_type", (barrack.x,
                                                                          barrack.y))
        
        elif smart_action == ACTION_BUILD_MARINE:
            if self.can_do(obs, actions.FUNCTIONS.Train_Marine_quick.id):
                return actions.FUNCTIONS.Train_Marine_quick("queued")
        
        elif smart_action == ACTION_SELECT_ARMY:
            if self.can_do(obs, actions.FUNCTIONS.select_army.id):
                return actions.FUNCTIONS.select_army("select")
        
        elif smart_action == ACTION_ATTACK:
            if self.can_do(obs, actions.FUNCTIONS.Attack_minimap.id):
                if self.base_top_left:
                    return actions.FUNCTIONS.Attack_minimap("now", [39, 45])
                else:
                    return actions.FUNCTIONS.Attack_minimap("now", [21, 24])
            
        return actions.FUNCTIONS.no_op()

### [run code]

In [14]:
if __name__ == "__main__":
  app.run(main)

I0624 20:17:02.806581 4443352512 sc_process.py:135] Launching SC2: /Applications/StarCraft II/Versions/Base80188/SC2.app/Contents/MacOS/SC2 -listen 127.0.0.1 -port 24794 -dataDir /Applications/StarCraft II/ -tempDir /var/folders/kl/h0d5qxj551x0d2y091w17l1h0000gn/T/sc-bvkm2j2x/ -displayMode 0 -windowwidth 640 -windowheight 480 -windowx 50 -windowy 50
I0624 20:17:02.900403 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:24794/sc2api, attempt: 0, running: True
I0624 20:17:03.908482 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:24794/sc2api, attempt: 1, running: True
I0624 20:17:04.911352 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:24794/sc2api, attempt: 2, running: True
I0624 20:17:05.915674 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:24794/sc2api, attempt: 3, running: True
I0624 20:17:06.919007 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:24794/sc2api, attempt: 4, running: True
I06

   0/no_op                                              ()
   1/move_camera                                        (1/minimap [64, 64])
   2/select_point                                       (6/select_point_act [4]; 0/screen [84, 84])
   3/select_rect                                        (7/select_add [2]; 0/screen [84, 84]; 2/screen2 [84, 84])
   4/select_control_group                               (4/control_group_act [5]; 5/control_group_id [10])
   5/select_unit                                        (8/select_unit_act [4]; 9/select_unit_id [500])
 264/Harvest_Gather_screen                              (3/queued [2]; 0/screen [84, 84])
  12/Attack_screen                                      (3/queued [2]; 0/screen [84, 84])
  13/Attack_minimap                                     (3/queued [2]; 1/minimap [64, 64])
 269/Harvest_Return_quick                               (3/queued [2])
 274/HoldPosition_quick                                 (3/queued [2])
 549/Effect_Spray_minimap 

I0624 20:19:12.081377 123145664552960 sc2_env.py:752] Environment Close
I0624 20:19:12.082440 123145664552960 sc2_env.py:752] Environment Close
I0624 20:19:12.083066 123145664552960 sc2_env.py:752] Environment Close
I0624 20:19:12.083782 123145664552960 sc2_env.py:752] Environment Close


Took 119.243 seconds for 226 steps: 1.895 fps


I0624 20:19:31.302324 4443352512 sc2_env.py:752] Environment Close
I0624 20:19:31.305615 4443352512 sc_process.py:232] Shutdown gracefully.
I0624 20:19:31.306792 4443352512 sc_process.py:210] Shutdown with return code: 1


ConnectionError: Error during save_replay: Socket error: [Errno 54] Connection reset by peer

## 4. Defining Rewards

In [15]:
import random
import time
import math

import numpy as np
import pandas as pd


from pysc2.agents import base_agent
from pysc2.env import sc2_env
from pysc2.lib import actions, features, units
from absl import app

In [16]:
ACTION_DO_NOTHING = 'donothing'
ACTION_SELECT_SCV = 'selectscv'
ACTION_BUILD_SUPPLY_DEPOT = 'buildsupplydepot'
ACTION_BUILD_BARRACKS = 'buildbarracks'
ACTION_SELECT_BARRACKS = 'selectbarracks'
ACTION_BUILD_MARINE = 'buildmarine'
ACTION_SELECT_ARMY = 'selectarmy'
ACTION_ATTACK = 'attack'

smart_actions = [
    ACTION_DO_NOTHING,
    ACTION_SELECT_SCV,
    ACTION_BUILD_SUPPLY_DEPOT,
    ACTION_BUILD_BARRACKS,
    ACTION_SELECT_BARRACKS,
    ACTION_BUILD_MARINE,
    ACTION_SELECT_ARMY,
    ACTION_ATTACK,
]

KILL_UNIT_REWARD = 0.2
KILL_BUILDING_REWARD = 0.5

In [18]:
# reference from https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow
class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
        self.actions = actions  # a list
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon = e_greedy
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)

    def choose_action(self, observation):
        self.check_state_exist(observation)
        
        if np.random.uniform() < self.epsilon:
            # choose best action
            # state_action = self.q_table.ix[observation, :]
            state_action = self.q_table.loc[observation, self.q_table.columns[:]]
            
            # some actions have the same value
            state_action = state_action.reindex(np.random.permutation(state_action.index))
            
            action = state_action.idxmax()
        else:
            # choose random action
            action = np.random.choice(self.actions)
            
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        self.check_state_exist(s)
        
        #q_predict = self.q_table.ix[s, a]
        q_predict = self.q_table.loc[s, a]
        #q_target = r + self.gamma * self.q_table.ix[s_, :].max()
        q_target = r + self.gamma * self.q_table.loc[s_, self.q_table.columns[:]].max()
        
        # update
        #self.q_table.ix[s, a] += self.lr * (q_target - q_predict)
        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            # append new state to q table
            self.q_table = self.q_table.append(pd.Series([0] * len(self.actions), index=self.q_table.columns, name=state))

In [19]:
class TerranRLAgent(base_agent.BaseAgent):
    def __init__(self):
        super(TerranRLAgent, self).__init__()

        self.base_top_left = None
        self.qlearn = QLearningTable(actions=list(range(len(smart_actions))))
        
        self.previous_killed_unit_score = 0
        self.previous_killed_building_score = 0

    def transformLocation(self, x, x_distance, y, y_distance):
        if not self.base_top_left:
            return [x - x_distance, y - y_distance]
        
        return [x + x_distance, y + y_distance]

    def unit_type_is_selected(self, obs, unit_type):
        if (len(obs.observation.single_select) > 0 and
            obs.observation.single_select[0].unit_type == unit_type):
              return True

        if (len(obs.observation.multi_select) > 0 and
            obs.observation.multi_select[0].unit_type == unit_type):
              return True

        return False

    def get_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.feature_units
                if unit.unit_type == unit_type]

    def can_do(self, obs, action):
        return action in obs.observation.available_actions
    
    def step(self, obs):
        super(TerranRLAgent, self).step(obs)

        time.sleep(0.5)
        
        if self.base_top_left is None:
            player_y, player_x = (obs.observation.feature_minimap.player_relative == features.PlayerRelative.SELF).nonzero()
            self.base_top_left = player_y.mean() <= 31

        supply_depot_count = len(self.get_units_by_type(obs, units.Terran.SupplyDepot))

        barracks_count = len(self.get_units_by_type(obs, units.Terran.Barracks))
            
        supply_limit = obs.observation.player.food_cap
        army_supply = obs.observation.player.food_used
        
        killed_unit_score = obs.observation['score_cumulative'][5]
        killed_building_score = obs.observation['score_cumulative'][6]
        
        current_state = [
            supply_depot_count,
            barracks_count,
            supply_limit,
            army_supply,
        ]
        
        reward = 0
        
        if killed_unit_score > self.previous_killed_unit_score:
            reward += KILL_UNIT_REWARD
                
        if killed_building_score > self.previous_killed_building_score:
            reward += KILL_BUILDING_REWARD
                
        rl_action = self.qlearn.choose_action(str(current_state))
        smart_action = smart_actions[rl_action]
        
        self.previous_killed_unit_score = killed_unit_score
        self.previous_killed_building_score = killed_building_score
        
        if smart_action == ACTION_DO_NOTHING:
            return actions.FUNCTIONS.no_op()

        elif smart_action == ACTION_SELECT_SCV:
            scvs = self.get_units_by_type(obs, units.Terran.SCV)
            if len(scvs) > 0:
                scv = random.choice(scvs)
                return actions.FUNCTIONS.select_point("select_all_type", (scv.x,
                                                                          scv.y))
        
        elif smart_action == ACTION_BUILD_SUPPLY_DEPOT:
            if self.can_do(obs, actions.FUNCTIONS.Build_SupplyDepot_screen.id):
                ccs = self.get_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    cc = random.choice(ccs)
                    target = self.transformLocation(cc.x, 0, cc.y, 20)

                    return actions.FUNCTIONS.Build_SupplyDepot_screen("now", target)        
        
        elif smart_action == ACTION_BUILD_BARRACKS:
            if self.can_do(obs, actions.FUNCTIONS.Build_Barracks_screen.id):
                ccs = self.get_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    cc = random.choice(ccs)
                    target = self.transformLocation(cc.x, 20, cc.y, 0)

                    return actions.FUNCTIONS.Build_Barracks_screen("now", target)
    
        elif smart_action == ACTION_SELECT_BARRACKS:
            barracks = self.get_units_by_type(obs, units.Terran.Barracks)
            if len(barracks) > 0:
                barrack = random.choice(barracks)
                return actions.FUNCTIONS.select_point("select_all_type", (barrack.x,
                                                                          barrack.y))
        
        elif smart_action == ACTION_BUILD_MARINE:
            if self.can_do(obs, actions.FUNCTIONS.Train_Marine_quick.id):
                return actions.FUNCTIONS.Train_Marine_quick("queued")
        
        elif smart_action == ACTION_SELECT_ARMY:
            if self.can_do(obs, actions.FUNCTIONS.select_army.id):
                return actions.FUNCTIONS.select_army("select")
        
        elif smart_action == ACTION_ATTACK:
            if self.can_do(obs, actions.FUNCTIONS.Attack_minimap.id):
                if self.base_top_left:
                    return actions.FUNCTIONS.Attack_minimap("now", [39, 45])
                else:
                    return actions.FUNCTIONS.Attack_minimap("now", [21, 24])
            
        return actions.FUNCTIONS.no_op()

### [run code]

In [20]:
if __name__ == "__main__":
  app.run(main)

I0624 21:03:10.317957 4443352512 sc_process.py:135] Launching SC2: /Applications/StarCraft II/Versions/Base80188/SC2.app/Contents/MacOS/SC2 -listen 127.0.0.1 -port 20942 -dataDir /Applications/StarCraft II/ -tempDir /var/folders/kl/h0d5qxj551x0d2y091w17l1h0000gn/T/sc-r942z9hi/ -displayMode 0 -windowwidth 640 -windowheight 480 -windowx 50 -windowy 50
I0624 21:03:10.357984 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:20942/sc2api, attempt: 0, running: True
I0624 21:03:11.366072 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:20942/sc2api, attempt: 1, running: True
I0624 21:03:12.371603 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:20942/sc2api, attempt: 2, running: True
I0624 21:03:13.378107 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:20942/sc2api, attempt: 3, running: True
I0624 21:03:14.384209 4443352512 remote_controller.py:166] Connecting to: ws://127.0.0.1:20942/sc2api, attempt: 4, running: True
I06

   0/no_op                                              ()
   1/move_camera                                        (1/minimap [64, 64])
   2/select_point                                       (6/select_point_act [4]; 0/screen [84, 84])
   3/select_rect                                        (7/select_add [2]; 0/screen [84, 84]; 2/screen2 [84, 84])
   4/select_control_group                               (4/control_group_act [5]; 5/control_group_id [10])
   5/select_unit                                        (8/select_unit_act [4]; 9/select_unit_id [500])
 264/Harvest_Gather_screen                              (3/queued [2]; 0/screen [84, 84])
  12/Attack_screen                                      (3/queued [2]; 0/screen [84, 84])
  13/Attack_minimap                                     (3/queued [2]; 1/minimap [64, 64])
 274/HoldPosition_quick                                 (3/queued [2])
 549/Effect_Spray_minimap                               (3/queued [2]; 1/minimap [64, 64])
 451/S

I0624 21:06:27.910899 4443352512 sc2_env.py:752] Environment Close
I0624 21:06:27.913089 4443352512 sc_process.py:232] Shutdown gracefully.
I0624 21:06:27.913661 4443352512 sc_process.py:210] Shutdown with return code: 1


ConnectionError: Error during save_replay: Socket error: [Errno 54] Connection reset by peer

## 5. Connecting All Up

In [3]:
import random
import time
import math

import numpy as np
import pandas as pd


from pysc2.agents import base_agent
from pysc2.env import sc2_env
from pysc2.lib import actions, features, units
from absl import app

In [4]:
ACTION_DO_NOTHING = 'donothing'
ACTION_SELECT_SCV = 'selectscv'
ACTION_BUILD_SUPPLY_DEPOT = 'buildsupplydepot'
ACTION_BUILD_BARRACKS = 'buildbarracks'
ACTION_SELECT_BARRACKS = 'selectbarracks'
ACTION_BUILD_MARINE = 'buildmarine'
ACTION_SELECT_ARMY = 'selectarmy'
ACTION_ATTACK = 'attack'

smart_actions = [
    ACTION_DO_NOTHING,
    ACTION_SELECT_SCV,
    ACTION_BUILD_SUPPLY_DEPOT,
    ACTION_BUILD_BARRACKS,
    ACTION_SELECT_BARRACKS,
    ACTION_BUILD_MARINE,
    ACTION_SELECT_ARMY,
    ACTION_ATTACK,
]

KILL_UNIT_REWARD = 0.2
KILL_BUILDING_REWARD = 0.5

In [5]:
# reference from https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow
class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9):
        self.actions = actions  # a list
        self.lr = learning_rate
        self.gamma = reward_decay
        self.epsilon = e_greedy
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)

    def choose_action(self, observation):
        self.check_state_exist(observation)
        
        if np.random.uniform() < self.epsilon:
            # choose best action
            # state_action = self.q_table.ix[observation, :]
            state_action = self.q_table.loc[observation, self.q_table.columns[:]]
            
            # some actions have the same value
            state_action = state_action.reindex(np.random.permutation(state_action.index))
            
            action = state_action.idxmax()
        else:
            # choose random action
            action = np.random.choice(self.actions)
            
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        self.check_state_exist(s)
        
        #q_predict = self.q_table.ix[s, a]
        q_predict = self.q_table.loc[s, a]
        #q_target = r + self.gamma * self.q_table.ix[s_, :].max()
        q_target = r + self.gamma * self.q_table.loc[s_, self.q_table.columns[:]].max()
        
        # update
        #self.q_table.ix[s, a] += self.lr * (q_target - q_predict)
        self.q_table.loc[s, a] += self.lr * (q_target - q_predict)

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            # append new state to q table
            self.q_table = self.q_table.append(pd.Series([0] * len(self.actions), index=self.q_table.columns, name=state))

In [6]:
class TerranRLAgent(base_agent.BaseAgent):
    def __init__(self):
        super(TerranRLAgent, self).__init__()

        self.base_top_left = None
        self.qlearn = QLearningTable(actions=list(range(len(smart_actions))))
        
        self.previous_killed_unit_score = 0
        self.previous_killed_building_score = 0
        
        self.previous_action = None
        self.previous_state = None

    def transformLocation(self, x, x_distance, y, y_distance):
        if not self.base_top_left:
            return [x - x_distance, y - y_distance]
        
        return [x + x_distance, y + y_distance]

    def unit_type_is_selected(self, obs, unit_type):
        if (len(obs.observation.single_select) > 0 and
            obs.observation.single_select[0].unit_type == unit_type):
              return True

        if (len(obs.observation.multi_select) > 0 and
            obs.observation.multi_select[0].unit_type == unit_type):
              return True

        return False

    def get_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.feature_units
                if unit.unit_type == unit_type]

    def can_do(self, obs, action):
        return action in obs.observation.available_actions
    
    def step(self, obs):
        super(TerranRLAgent, self).step(obs)

        #time.sleep(0.5)
        
        if self.base_top_left is None:
            player_y, player_x = (obs.observation.feature_minimap.player_relative == features.PlayerRelative.SELF).nonzero()
            self.base_top_left = player_y.mean() <= 31

        supply_depot_count = len(self.get_units_by_type(obs, units.Terran.SupplyDepot))

        barracks_count = len(self.get_units_by_type(obs, units.Terran.Barracks))
            
        supply_limit = obs.observation.player.food_cap
        army_supply = obs.observation.player.food_used
        
        killed_unit_score = obs.observation['score_cumulative'][5]
        killed_building_score = obs.observation['score_cumulative'][6]
        
        current_state = [
            supply_depot_count,
            barracks_count,
            supply_limit,
            army_supply,
        ]
        
        if self.previous_action is not None:
            reward = 0
                
            if killed_unit_score > self.previous_killed_unit_score:
                reward += KILL_UNIT_REWARD
                    
            if killed_building_score > self.previous_killed_building_score:
                reward += KILL_BUILDING_REWARD
                
            self.qlearn.learn(str(self.previous_state), self.previous_action, reward, str(current_state))
        
        rl_action = self.qlearn.choose_action(str(current_state))
        smart_action = smart_actions[rl_action]
        
        self.previous_killed_unit_score = killed_unit_score
        self.previous_killed_building_score = killed_building_score
        self.previous_state = current_state
        self.previous_action = rl_action
        
        if smart_action == ACTION_DO_NOTHING:
            return actions.FUNCTIONS.no_op()

        elif smart_action == ACTION_SELECT_SCV:
            scvs = self.get_units_by_type(obs, units.Terran.SCV)
            if len(scvs) > 0:
                scv = random.choice(scvs)
                return actions.FUNCTIONS.select_point("select_all_type", (scv.x,
                                                                          scv.y))
        
        elif smart_action == ACTION_BUILD_SUPPLY_DEPOT:
            if self.can_do(obs, actions.FUNCTIONS.Build_SupplyDepot_screen.id):
                ccs = self.get_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    cc = random.choice(ccs)
                    target = self.transformLocation(cc.x, 0, cc.y, 20)

                    return actions.FUNCTIONS.Build_SupplyDepot_screen("now", target)        
        
        elif smart_action == ACTION_BUILD_BARRACKS:
            if self.can_do(obs, actions.FUNCTIONS.Build_Barracks_screen.id):
                ccs = self.get_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    cc = random.choice(ccs)
                    target = self.transformLocation(cc.x, 20, cc.y, 0)

                    return actions.FUNCTIONS.Build_Barracks_screen("now", target)
    
        elif smart_action == ACTION_SELECT_BARRACKS:
            barracks = self.get_units_by_type(obs, units.Terran.Barracks)
            if len(barracks) > 0:
                barrack = random.choice(barracks)
                return actions.FUNCTIONS.select_point("select_all_type", (barrack.x,
                                                                          barrack.y))
        
        elif smart_action == ACTION_BUILD_MARINE:
            if self.can_do(obs, actions.FUNCTIONS.Train_Marine_quick.id):
                return actions.FUNCTIONS.Train_Marine_quick("queued")
        
        elif smart_action == ACTION_SELECT_ARMY:
            if self.can_do(obs, actions.FUNCTIONS.select_army.id):
                return actions.FUNCTIONS.select_army("select")
        
        elif smart_action == ACTION_ATTACK:
            if self.can_do(obs, actions.FUNCTIONS.Attack_minimap.id):
                if self.base_top_left:
                    return actions.FUNCTIONS.Attack_minimap("now", [39, 45])
                else:
                    return actions.FUNCTIONS.Attack_minimap("now", [21, 24])
            
        return actions.FUNCTIONS.no_op()

### [run code]

In [7]:
if __name__ == "__main__":
  app.run(main)

I0624 21:30:41.547713 4676054464 sc_process.py:135] Launching SC2: /Applications/StarCraft II/Versions/Base80188/SC2.app/Contents/MacOS/SC2 -listen 127.0.0.1 -port 19557 -dataDir /Applications/StarCraft II/ -tempDir /var/folders/kl/h0d5qxj551x0d2y091w17l1h0000gn/T/sc-cegmm0e6/ -displayMode 0 -windowwidth 640 -windowheight 480 -windowx 50 -windowy 50
I0624 21:30:41.553560 4676054464 remote_controller.py:166] Connecting to: ws://127.0.0.1:19557/sc2api, attempt: 0, running: True
I0624 21:30:42.563843 4676054464 remote_controller.py:166] Connecting to: ws://127.0.0.1:19557/sc2api, attempt: 1, running: True
I0624 21:30:43.565986 4676054464 remote_controller.py:166] Connecting to: ws://127.0.0.1:19557/sc2api, attempt: 2, running: True
I0624 21:30:44.572957 4676054464 remote_controller.py:166] Connecting to: ws://127.0.0.1:19557/sc2api, attempt: 3, running: True
I0624 21:30:45.574810 4676054464 remote_controller.py:166] Connecting to: ws://127.0.0.1:19557/sc2api, attempt: 4, running: True
I06

   0/no_op                                              ()
   1/move_camera                                        (1/minimap [64, 64])
   2/select_point                                       (6/select_point_act [4]; 0/screen [84, 84])
   3/select_rect                                        (7/select_add [2]; 0/screen [84, 84]; 2/screen2 [84, 84])
   4/select_control_group                               (4/control_group_act [5]; 5/control_group_id [10])
   5/select_unit                                        (8/select_unit_act [4]; 9/select_unit_id [500])
 264/Harvest_Gather_screen                              (3/queued [2]; 0/screen [84, 84])
  12/Attack_screen                                      (3/queued [2]; 0/screen [84, 84])
  13/Attack_minimap                                     (3/queued [2]; 1/minimap [64, 64])
 274/HoldPosition_quick                                 (3/queued [2])
 549/Effect_Spray_minimap                               (3/queued [2]; 1/minimap [64, 64])
 451/S

I0624 21:31:37.166640 4676054464 sc2_env.py:722] Episode 1 finished after 10144 game steps. Outcome: [-1], reward: [-1], score: [55]
I0624 21:31:41.143948 4676054464 sc2_env.py:506] Starting episode 2: [terran, random] on Simple64
I0624 21:32:21.446394 4676054464 sc2_env.py:722] Episode 2 finished after 15304 game steps. Outcome: [-1], reward: [-1], score: [50]
I0624 21:32:25.305299 4676054464 sc2_env.py:506] Starting episode 3: [terran, random] on Simple64
I0624 21:32:27.339617 4676054464 sc2_env.py:722] Episode 3 finished after 872 game steps. Outcome: [-1], reward: [-1], score: [650]
I0624 21:32:31.087931 4676054464 sc2_env.py:506] Starting episode 4: [terran, random] on Simple64


  79/Build_Refinery_screen                              (3/queued [2]; 0/screen [84, 84])
  91/Build_SupplyDepot_screen                           (3/queued [2]; 0/screen [84, 84])
 261/Halt_quick                                         (3/queued [2])
   6/select_idle_worker                                 (10/select_worker [4])


I0624 21:32:55.722808 4676054464 sc2_env.py:722] Episode 4 finished after 10744 game steps. Outcome: [-1], reward: [-1], score: [65]
I0624 21:32:59.630363 4676054464 sc2_env.py:506] Starting episode 5: [terran, random] on Simple64
I0624 21:33:22.028886 4676054464 sc2_env.py:722] Episode 5 finished after 10344 game steps. Outcome: [-1], reward: [-1], score: [75]
I0624 21:33:25.848289 4676054464 sc2_env.py:506] Starting episode 6: [terran, random] on Simple64
I0624 21:33:28.047236 4676054464 sc2_env.py:722] Episode 6 finished after 896 game steps. Outcome: [-1], reward: [-1], score: [655]
I0624 21:33:31.785370 4676054464 sc2_env.py:506] Starting episode 7: [terran, random] on Simple64
I0624 21:33:33.864907 4676054464 sc2_env.py:722] Episode 7 finished after 904 game steps. Outcome: [-1], reward: [-1], score: [655]
I0624 21:33:37.652884 4676054464 sc2_env.py:506] Starting episode 8: [terran, random] on Simple64
I0624 21:33:39.704809 4676054464 sc2_env.py:722] Episode 8 finished after 912 

  50/Build_EngineeringBay_screen                        (3/queued [2]; 0/screen [84, 84])
  44/Build_CommandCenter_screen                         (3/queued [2]; 0/screen [84, 84])
  42/Build_Barracks_screen                              (3/queued [2]; 0/screen [84, 84])
  43/Build_Bunker_screen                                (3/queued [2]; 0/screen [84, 84])


I0624 21:35:32.616453 4676054464 sc2_env.py:722] Episode 13 finished after 13120 game steps. Outcome: [0], reward: [0], score: [990]
I0624 21:35:37.131121 4676054464 sc2_env.py:506] Starting episode 14: [terran, random] on Simple64
I0624 21:36:08.624001 4676054464 sc2_env.py:722] Episode 14 finished after 10584 game steps. Outcome: [-1], reward: [-1], score: [10]
I0624 21:36:12.951419 4676054464 sc2_env.py:506] Starting episode 15: [terran, random] on Simple64
I0624 21:36:39.614391 4676054464 sc2_env.py:722] Episode 15 finished after 9384 game steps. Outcome: [-1], reward: [-1], score: [155]
I0624 21:36:43.706478 4676054464 sc2_env.py:506] Starting episode 16: [terran, random] on Simple64
I0624 21:37:32.358588 4676054464 sc2_env.py:722] Episode 16 finished after 11240 game steps. Outcome: [-1], reward: [-1], score: [425]
I0624 21:37:38.659747 4676054464 sc2_env.py:506] Starting episode 17: [terran, random] on Simple64
I0624 21:39:01.744621 4676054464 sc2_env.py:722] Episode 17 finished

 140/Cancel_quick                                       (3/queued [2])
 335/Rally_Units_screen                                 (3/queued [2]; 0/screen [84, 84])
 336/Rally_Units_minimap                                (3/queued [2]; 1/minimap [64, 64])
 281/Lift_quick                                         (3/queued [2])
 477/Train_Marine_quick                                 (3/queued [2])
 168/Cancel_Last_quick                                  (3/queued [2])
  11/build_queue                                        (11/build_queue_id [10])
   7/select_army                                        (7/select_add [2])


I0624 21:47:10.053254 4676054464 sc2_env.py:722] Episode 29 finished after 14936 game steps. Outcome: [-1], reward: [-1], score: [40]
I0624 21:47:14.146405 4676054464 sc2_env.py:506] Starting episode 30: [terran, random] on Simple64
I0624 21:47:35.512274 4676054464 sc2_env.py:722] Episode 30 finished after 8328 game steps. Outcome: [-1], reward: [-1], score: [70]
I0624 21:47:39.515069 4676054464 sc2_env.py:506] Starting episode 31: [terran, random] on Simple64


 318/Morph_SupplyDepot_Lower_quick                      (3/queued [2])


I0624 21:48:03.350636 4676054464 sc2_env.py:722] Episode 31 finished after 8800 game steps. Outcome: [-1], reward: [-1], score: [380]
I0624 21:48:07.409599 4676054464 sc2_env.py:506] Starting episode 32: [terran, random] on Simple64
I0624 21:48:38.550405 4676054464 sc2_env.py:722] Episode 32 finished after 10984 game steps. Outcome: [-1], reward: [-1], score: [485]
I0624 21:48:42.614320 4676054464 sc2_env.py:506] Starting episode 33: [terran, random] on Simple64
I0624 21:49:09.352229 4676054464 sc2_env.py:722] Episode 33 finished after 10136 game steps. Outcome: [-1], reward: [-1], score: [70]
I0624 21:49:13.390722 4676054464 sc2_env.py:506] Starting episode 34: [terran, random] on Simple64
I0624 21:49:40.311246 4676054464 sc2_env.py:722] Episode 34 finished after 9376 game steps. Outcome: [-1], reward: [-1], score: [735]
I0624 21:49:44.717716 4676054464 sc2_env.py:506] Starting episode 35: [terran, random] on Simple64
I0624 21:50:18.314795 4676054464 sc2_env.py:722] Episode 35 finishe

I0624 22:10:29.835561 4676054464 sc2_env.py:506] Starting episode 67: [terran, random] on Simple64
I0624 22:11:10.000726 4676054464 sc2_env.py:722] Episode 67 finished after 10912 game steps. Outcome: [-1], reward: [-1], score: [840]
I0624 22:11:14.545356 4676054464 sc2_env.py:506] Starting episode 68: [terran, random] on Simple64
I0624 22:11:41.382596 4676054464 sc2_env.py:722] Episode 68 finished after 8544 game steps. Outcome: [-1], reward: [-1], score: [655]
I0624 22:11:45.604297 4676054464 sc2_env.py:506] Starting episode 69: [terran, random] on Simple64
I0624 22:12:33.075769 4676054464 sc2_env.py:722] Episode 69 finished after 11072 game steps. Outcome: [-1], reward: [-1], score: [110]
I0624 22:12:38.145781 4676054464 sc2_env.py:506] Starting episode 70: [terran, random] on Simple64
I0624 22:14:02.277989 4676054464 sc2_env.py:722] Episode 70 finished after 16400 game steps. Outcome: [-1], reward: [-1], score: [20]
I0624 22:14:07.638508 4676054464 sc2_env.py:506] Starting episode 

I0624 22:33:28.426715 4676054464 sc2_env.py:722] Episode 102 finished after 9520 game steps. Outcome: [-1], reward: [-1], score: [205]
I0624 22:33:32.492629 4676054464 sc2_env.py:506] Starting episode 103: [terran, random] on Simple64
I0624 22:33:56.457993 4676054464 sc2_env.py:722] Episode 103 finished after 8784 game steps. Outcome: [-1], reward: [-1], score: [300]
I0624 22:34:00.612591 4676054464 sc2_env.py:506] Starting episode 104: [terran, random] on Simple64
I0624 22:34:26.566098 4676054464 sc2_env.py:722] Episode 104 finished after 8792 game steps. Outcome: [-1], reward: [-1], score: [625]
I0624 22:34:30.830204 4676054464 sc2_env.py:506] Starting episode 105: [terran, random] on Simple64
I0624 22:35:01.717993 4676054464 sc2_env.py:722] Episode 105 finished after 10504 game steps. Outcome: [-1], reward: [-1], score: [210]
I0624 22:35:06.014381 4676054464 sc2_env.py:506] Starting episode 106: [terran, random] on Simple64
I0624 22:35:33.247213 4676054464 sc2_env.py:722] Episode 10

I0624 23:10:53.040408 4676054464 sc2_env.py:722] Episode 137 finished after 11568 game steps. Outcome: [-1], reward: [-1], score: [50]
I0624 23:10:57.481811 4676054464 sc2_env.py:506] Starting episode 138: [terran, random] on Simple64
I0624 23:12:33.770622 4676054464 sc2_env.py:722] Episode 138 finished after 11552 game steps. Outcome: [-1], reward: [-1], score: [50]
I0624 23:12:38.483048 4676054464 sc2_env.py:506] Starting episode 139: [terran, random] on Simple64
I0624 23:13:58.510668 4676054464 sc2_env.py:722] Episode 139 finished after 9600 game steps. Outcome: [-1], reward: [-1], score: [120]
I0624 23:14:03.213701 4676054464 sc2_env.py:506] Starting episode 140: [terran, random] on Simple64
I0624 23:14:10.846380 4676054464 sc2_env.py:722] Episode 140 finished after 912 game steps. Outcome: [-1], reward: [-1], score: [665]
I0624 23:14:15.553574 4676054464 sc2_env.py:506] Starting episode 141: [terran, random] on Simple64
I0624 23:14:22.512516 4676054464 sc2_env.py:722] Episode 141 

I0624 23:43:12.689558 4676054464 sc2_env.py:722] Episode 172 finished after 10504 game steps. Outcome: [-1], reward: [-1], score: [55]
I0624 23:43:16.872156 4676054464 sc2_env.py:506] Starting episode 173: [terran, random] on Simple64
I0624 23:43:46.936976 4676054464 sc2_env.py:722] Episode 173 finished after 9896 game steps. Outcome: [-1], reward: [-1], score: [245]
I0624 23:43:52.986819 4676054464 sc2_env.py:506] Starting episode 174: [terran, random] on Simple64
I0624 23:45:29.205222 4676054464 sc2_env.py:722] Episode 174 finished after 11528 game steps. Outcome: [-1], reward: [-1], score: [80]
I0624 23:45:33.643907 4676054464 sc2_env.py:506] Starting episode 175: [terran, random] on Simple64
I0624 23:47:17.670589 4676054464 sc2_env.py:722] Episode 175 finished after 12424 game steps. Outcome: [-1], reward: [-1], score: [10]
I0624 23:47:22.379911 4676054464 sc2_env.py:506] Starting episode 176: [terran, random] on Simple64
I0624 23:47:29.537445 4676054464 sc2_env.py:722] Episode 176

Took 8821.295 seconds for 236432 steps: 26.802 fps


I0624 23:58:18.685752 4676054464 sc_process.py:232] Shutdown gracefully.
I0624 23:58:18.686533 4676054464 sc_process.py:210] Shutdown with return code: -15


ProtocolError: `save_replay` called while in state: Status.init_game, valid: (Status.in_game,Status.in_replay,Status.ended)