# STEP 4 - Making DRL PySC2 Agent

In [None]:
%load_ext autoreload
%autoreload 2

## 0. Runnning 'Agent code' on jupyter notebook 

In [None]:
### unfortunately, PySC2 uses Abseil, which treats python code as if its run like an app
# This does not play well with jupyter notebook
# So we will need to monkeypatch sys.argv


import sys
#sys.argv = ["python", "--map", "AbyssalReef"]
sys.argv = ["python", "--map", "Simple64"]

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run an agent."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import importlib
import threading

from absl import app
from absl import flags
from future.builtins import range  # pylint: disable=redefined-builtin

from pysc2 import maps
from pysc2.env import available_actions_printer
from pysc2.env import run_loop
from pysc2.env import sc2_env
from pysc2.lib import point_flag
from pysc2.lib import stopwatch
from pysc2.lib import actions

FLAGS = flags.FLAGS

# because of Abseil's horrible design for running code underneath Colabs
# We have to pull out this ugly hack from the hat
if "flags_defined" not in globals():
    flags.DEFINE_bool("render", True, "Whether to render with pygame.")
    point_flag.DEFINE_point("feature_screen_size", "84",
                            "Resolution for screen feature layers.")
    point_flag.DEFINE_point("feature_minimap_size", "64",
                            "Resolution for minimap feature layers.")
    point_flag.DEFINE_point("rgb_screen_size", None,
                            "Resolution for rendered screen.")
    point_flag.DEFINE_point("rgb_minimap_size", None,
                            "Resolution for rendered minimap.")
    flags.DEFINE_enum("action_space", "RAW", sc2_env.ActionSpace._member_names_,  # pylint: disable=protected-access
                      "Which action space to use. Needed if you take both feature "
                      "and rgb observations.")
    flags.DEFINE_bool("use_feature_units", False,
                      "Whether to include feature units.")
    flags.DEFINE_bool("use_raw_units", True,
                      "Whether to include raw units.")
    flags.DEFINE_integer("raw_resolution", 64, "Raw Resolution.")
    flags.DEFINE_bool("disable_fog", True, "Whether to disable Fog of War.")

    flags.DEFINE_integer("max_agent_steps", 0, "Total agent steps.")
    flags.DEFINE_integer("game_steps_per_episode", None, "Game steps per episode.")
    flags.DEFINE_integer("max_episodes", 0, "Total episodes.")
    flags.DEFINE_integer("step_mul", 8, "Game steps per agent step.")
    flags.DEFINE_float("fps", 22.4, "Frames per second to run the game.")

    #flags.DEFINE_string("agent", "sc2.agent.BasicAgent.ZergBasicAgent",
    #                    "Which agent to run, as a python path to an Agent class.")
    #flags.DEFINE_enum("agent_race", "zerg", sc2_env.Race._member_names_,  # pylint: disable=protected-access
    #                  "Agent 1's race.")
    flags.DEFINE_string("agent", "TerranRLAgentWithRawActsAndRawObs",
                        "Which agent to run, as a python path to an Agent class.")
    flags.DEFINE_enum("agent_race", "terran", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 1's race.")

    flags.DEFINE_string("agent2", "Bot", "Second agent, either Bot or agent class.")
    flags.DEFINE_enum("agent2_race", "terran", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 2's race.")
    flags.DEFINE_enum("difficulty", "very_easy", sc2_env.Difficulty._member_names_,  # pylint: disable=protected-access
                      "If agent2 is a built-in Bot, it's strength.")

    flags.DEFINE_bool("profile", False, "Whether to turn on code profiling.")
    flags.DEFINE_bool("trace", False, "Whether to trace the code execution.")
    flags.DEFINE_integer("parallel", 1, "How many instances to run in parallel.")

    flags.DEFINE_bool("save_replay", True, "Whether to save a replay at the end.")

    flags.DEFINE_string("map", None, "Name of a map to use.")
    flags.mark_flag_as_required("map")

flags_defined = True

def run_thread(agent_classes, players, map_name, visualize):
  """Run one thread worth of the environment with agents."""
  with sc2_env.SC2Env(
      map_name=map_name,
      players=players,
      agent_interface_format=sc2_env.parse_agent_interface_format(
        feature_screen=FLAGS.feature_screen_size,
        feature_minimap=FLAGS.feature_minimap_size,
        rgb_screen=FLAGS.rgb_screen_size,
        rgb_minimap=FLAGS.rgb_minimap_size,
        action_space=FLAGS.action_space,
        use_raw_units=FLAGS.use_raw_units,
        raw_resolution=FLAGS.raw_resolution),
      step_mul=FLAGS.step_mul,
      game_steps_per_episode=FLAGS.game_steps_per_episode,
      disable_fog=FLAGS.disable_fog,
      visualize=visualize) as env:
    #env = available_actions_printer.AvailableActionsPrinter(env)
    agents = [agent_cls() for agent_cls in agent_classes]
    run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes)
    if FLAGS.save_replay:
      env.save_replay(agent_classes[0].__name__)

def main(unused_argv):
  """Run an agent."""
  #stopwatch.sw.enabled = FLAGS.profile or FLAGS.trace
  #stopwatch.sw.trace = FLAGS.trace

  map_inst = maps.get(FLAGS.map)

  agent_classes = []
  players = []

  #agent_module, agent_name = FLAGS.agent.rsplit(".", 1)
  #agent_cls = getattr(importlib.import_module(agent_module), agent_name)
  #agent_classes.append(agent_cls)
  agent_classes.append(TerranRLAgentWithRawActsAndRawObs)
  players.append(sc2_env.Agent(sc2_env.Race[FLAGS.agent_race]))

  if map_inst.players >= 2:
    if FLAGS.agent2 == "Bot":
      players.append(sc2_env.Bot(sc2_env.Race[FLAGS.agent2_race],
                                 sc2_env.Difficulty[FLAGS.difficulty]))
    else:
      #agent_module, agent_name = FLAGS.agent2.rsplit(".", 1)
      #agent_cls = getattr(importlib.import_module(agent_module), agent_name)
      agent_classes.append(TerranRandomAgent)
      players.append(sc2_env.Agent(sc2_env.Race[FLAGS.agent2_race]))

  threads = []
  for _ in range(FLAGS.parallel - 1):
    t = threading.Thread(target=run_thread,
                         args=(agent_classes, players, FLAGS.map, False))
    threads.append(t)
    t.start()

  run_thread(agent_classes, players, FLAGS.map, FLAGS.render)

  for t in threads:
    t.join()

  if FLAGS.profile:
    pass
    #print(stopwatch.sw)

## 1. Creating a PySC2 Agent with Raw Actions & Observations

In [None]:
import random
import time
import math
import os.path

import numpy as np
import pandas as pd


from pysc2.agents import base_agent
from pysc2.env import sc2_env
from pysc2.lib import actions, features, units
from absl import app

In [31]:
# reference from https://github.com/MorvanZhou/Reinforcement-learning-with-tensorflow
class QLearningTable:
    def __init__(self, actions, learning_rate=0.01, reward_decay=0.9):
        self.actions = actions
        self.learning_rate = learning_rate
        self.reward_decay = reward_decay
        self.q_table = pd.DataFrame(columns=self.actions, dtype=np.float64)

    def choose_action(self, observation, e_greedy=0.9):
        self.check_state_exist(observation)
        if np.random.uniform() < e_greedy:
            state_action = self.q_table.loc[observation, :]
            action = np.random.choice(
              state_action[state_action == np.max(state_action)].index)
        else:
            action = np.random.choice(self.actions)
        return action

    def learn(self, s, a, r, s_):
        self.check_state_exist(s_)
        q_predict = self.q_table.loc[s, a]
        if s_ != 'terminal':
            q_target = r + self.reward_decay * self.q_table.loc[s_, :].max()
        else:
            q_target = r
            
        self.q_table.loc[s, a] += self.learning_rate * (q_target - q_predict)

    def check_state_exist(self, state):
        if state not in self.q_table.index:
            self.q_table = self.q_table.append(pd.Series([0] * len(self.actions), 
                                                       index=self.q_table.columns, 
                                                       name=state))

In [32]:
class TerranAgentWithRawActsAndRawObs(base_agent.BaseAgent):
    actions = ("do_nothing",
               "harvest_minerals",
               "build_supply_depot",
               "build_barracks",
               "train_marine",
               "attack")

    def get_my_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.raw_units
                if unit.unit_type == unit_type
                and unit.alliance == features.PlayerRelative.SELF]

    def get_enemy_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.raw_units
                if unit.unit_type == unit_type
                and unit.alliance == features.PlayerRelative.ENEMY]

    def get_my_completed_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.raw_units
                if unit.unit_type == unit_type
                and unit.build_progress == 100
                and unit.alliance == features.PlayerRelative.SELF]

    def get_enemy_completed_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.raw_units
                if unit.unit_type == unit_type
                and unit.build_progress == 100
                and unit.alliance == features.PlayerRelative.ENEMY]

    def get_distances(self, obs, units, xy):
        units_xy = [(unit.x, unit.y) for unit in units]
        return np.linalg.norm(np.array(units_xy) - np.array(xy), axis=1)

    def step(self, obs):
        super(TerranAgentWithRawActsAndRawObs, self).step(obs)
        if obs.first():
            command_center = self.get_my_units_by_type(
                obs, units.Terran.CommandCenter)[0]
            self.base_top_left = (command_center.x < 32)

    def do_nothing(self, obs):
        return actions.RAW_FUNCTIONS.no_op()

    def harvest_minerals(self, obs):
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        idle_scvs = [scv for scv in scvs if scv.order_length == 0]
        if len(idle_scvs) > 0:
            mineral_patches = [unit for unit in obs.observation.raw_units
                               if unit.unit_type in [
                                   units.Neutral.BattleStationMineralField,
                                   units.Neutral.BattleStationMineralField750,
                                   units.Neutral.LabMineralField,
                                   units.Neutral.LabMineralField750,
                                   units.Neutral.MineralField,
                                   units.Neutral.MineralField750,
                                   units.Neutral.PurifierMineralField,
                                   units.Neutral.PurifierMineralField750,
                                   units.Neutral.PurifierRichMineralField,
                                   units.Neutral.PurifierRichMineralField750,
                                   units.Neutral.RichMineralField,
                                   units.Neutral.RichMineralField750
                               ]]
            scv = random.choice(idle_scvs)
            distances = self.get_distances(obs, mineral_patches, (scv.x, scv.y))
            mineral_patch = mineral_patches[np.argmin(distances)]
            return actions.RAW_FUNCTIONS.Harvest_Gather_unit(
                "now", scv.tag, mineral_patch.tag)
        return actions.RAW_FUNCTIONS.no_op()

    def build_supply_depot(self, obs):
        supply_depots = self.get_my_units_by_type(obs, units.Terran.SupplyDepot)
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        if (len(supply_depots) == 0 and obs.observation.player.minerals >= 100 and
                len(scvs) > 0):
            supply_depot_xy = (22, 26) if self.base_top_left else (35, 42)
            distances = self.get_distances(obs, scvs, supply_depot_xy)
            scv = scvs[np.argmin(distances)]
            return actions.RAW_FUNCTIONS.Build_SupplyDepot_pt(
                "now", scv.tag, supply_depot_xy)
        return actions.RAW_FUNCTIONS.no_op()

    def build_barracks(self, obs):
        completed_supply_depots = self.get_my_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        if (len(completed_supply_depots) > 0 and len(barrackses) == 0 and
                obs.observation.player.minerals >= 150 and len(scvs) > 0):
            barracks_xy = (22, 21) if self.base_top_left else (35, 45)
            distances = self.get_distances(obs, scvs, barracks_xy)
            scv = scvs[np.argmin(distances)]
            return actions.RAW_FUNCTIONS.Build_Barracks_pt(
                "now", scv.tag, barracks_xy)
        return actions.RAW_FUNCTIONS.no_op()

    def train_marine(self, obs):
        completed_barrackses = self.get_my_completed_units_by_type(
            obs, units.Terran.Barracks)
        free_supply = (obs.observation.player.food_cap -
                       obs.observation.player.food_used)
        if (len(completed_barrackses) > 0 and obs.observation.player.minerals >= 100
                and free_supply > 0):
            barracks = self.get_my_units_by_type(obs, units.Terran.Barracks)[0]
            if barracks.order_length < 5:
                return actions.RAW_FUNCTIONS.Train_Marine_quick("now", barracks.tag)
        return actions.RAW_FUNCTIONS.no_op()

    def attack(self, obs):
        marines = self.get_my_units_by_type(obs, units.Terran.Marine)
        if len(marines) > 0:
            attack_xy = (38, 44) if self.base_top_left else (19, 23)
            distances = self.get_distances(obs, marines, attack_xy)
            marine = marines[np.argmax(distances)]
            x_offset = random.randint(-4, 4)
            y_offset = random.randint(-4, 4)
            return actions.RAW_FUNCTIONS.Attack_pt(
                "now", marine.tag, (attack_xy[0] + x_offset, attack_xy[1] + y_offset))
        return actions.RAW_FUNCTIONS.no_op()

In [33]:
class TerranRandomAgent(TerranAgentWithRawActsAndRawObs):
    def step(self, obs):
        super(TerranRandomAgent, self).step(obs)
        action = random.choice(self.actions)
        return getattr(self, action)(obs)

In [34]:
class TerranRLAgentWithRawActsAndRawObs(TerranAgentWithRawActsAndRawObs):
    def __init__(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).__init__()
        self.qlearn = QLearningTable(self.actions)
        self.new_game()
        self.data_file = 'rlagent_with_raw_acts_and_obs_learning_data'
        if os.path.isfile(self.data_file + '.gz'):
            self.qlearn.q_table = pd.read_pickle(self.data_file + '.gz', compression='gzip')

    def reset(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).reset()
        self.new_game()

    def new_game(self):
        self.base_top_left = None
        self.previous_state = None
        self.previous_action = None

    def get_state(self, obs):
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        idle_scvs = [scv for scv in scvs if scv.order_length == 0]
        command_centers = self.get_my_units_by_type(obs, units.Terran.CommandCenter)
        supply_depots = self.get_my_units_by_type(obs, units.Terran.SupplyDepot)
        completed_supply_depots = self.get_my_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
        completed_barrackses = self.get_my_completed_units_by_type(
            obs, units.Terran.Barracks)
        marines = self.get_my_units_by_type(obs, units.Terran.Marine)

        queued_marines = (completed_barrackses[0].order_length
        if len(completed_barrackses) > 0 else 0)

        free_supply = (obs.observation.player.food_cap -
                       obs.observation.player.food_used)
        can_afford_supply_depot = obs.observation.player.minerals >= 100
        can_afford_barracks = obs.observation.player.minerals >= 150
        can_afford_marine = obs.observation.player.minerals >= 100

        enemy_scvs = self.get_enemy_units_by_type(obs, units.Terran.SCV)
        enemy_idle_scvs = [scv for scv in enemy_scvs if scv.order_length == 0]
        enemy_command_centers = self.get_enemy_units_by_type(
            obs, units.Terran.CommandCenter)
        enemy_supply_depots = self.get_enemy_units_by_type(
            obs, units.Terran.SupplyDepot)
        enemy_completed_supply_depots = self.get_enemy_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        enemy_barrackses = self.get_enemy_units_by_type(obs, units.Terran.Barracks)
        enemy_completed_barrackses = self.get_enemy_completed_units_by_type(
            obs, units.Terran.Barracks)
        enemy_marines = self.get_enemy_units_by_type(obs, units.Terran.Marine)

        return (len(command_centers),
                len(scvs),
                len(idle_scvs),
                len(supply_depots),
                len(completed_supply_depots),
                len(barrackses),
                len(completed_barrackses),
                len(marines),
                queued_marines,
                free_supply,
                can_afford_supply_depot,
                can_afford_barracks,
                can_afford_marine,
                len(enemy_command_centers),
                len(enemy_scvs),
                len(enemy_idle_scvs),
                len(enemy_supply_depots),
                len(enemy_completed_supply_depots),
                len(enemy_barrackses),
                len(enemy_completed_barrackses),
                len(enemy_marines))

    def step(self, obs):
        super(TerranRLAgentWithRawActsAndRawObs, self).step(obs)
        
        #time.sleep(0.5)
        
        state = str(self.get_state(obs))
        action = self.qlearn.choose_action(state)
        if self.previous_action is not None:
            self.qlearn.learn(self.previous_state,
                              self.previous_action,
                              obs.reward,
                              'terminal' if obs.last() else state)
        self.previous_state = state
        self.previous_action = action
        
        if obs.last():
            self.qlearn.q_table.to_pickle(self.data_file + '.gz', 'gzip')

        return getattr(self, action)(obs)

### [run code]

In [35]:
if __name__ == "__main__":
  app.run(main)

I0630 15:56:32.409721 4591930816 sc_process.py:135] Launching SC2: /Applications/StarCraft II/Versions/Base80188/SC2.app/Contents/MacOS/SC2 -listen 127.0.0.1 -port 21710 -dataDir /Applications/StarCraft II/ -tempDir /var/folders/kl/h0d5qxj551x0d2y091w17l1h0000gn/T/sc-jviipohg/ -displayMode 0 -windowwidth 640 -windowheight 480 -windowx 50 -windowy 50
I0630 15:56:32.492918 4591930816 remote_controller.py:166] Connecting to: ws://127.0.0.1:21710/sc2api, attempt: 0, running: True
I0630 15:56:33.501374 4591930816 remote_controller.py:166] Connecting to: ws://127.0.0.1:21710/sc2api, attempt: 1, running: True
I0630 15:56:34.505419 4591930816 remote_controller.py:166] Connecting to: ws://127.0.0.1:21710/sc2api, attempt: 2, running: True
I0630 15:56:35.508789 4591930816 remote_controller.py:166] Connecting to: ws://127.0.0.1:21710/sc2api, attempt: 3, running: True
I0630 15:56:36.514149 4591930816 remote_controller.py:166] Connecting to: ws://127.0.0.1:21710/sc2api, attempt: 4, running: True
I06

obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


I0630 15:59:24.925704 4591930816 sc2_env.py:722] Episode 1 finished after 19328 game steps. Outcome: [-1], reward: [-1], score: [6430]


obs.reward :  -1
q_target : -1 , q_predict :  0.0


I0630 15:59:28.769848 4591930816 sc2_env.py:506] Starting episode 2: [terran, terran] on Simple64


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0
obs.reward :  0
q_target : 0.0 , q_predict :  0.0


I0630 16:00:53.805925 4591930816 sc2_env.py:752] Environment Close
I0630 16:00:53.808187 4591930816 sc_process.py:232] Shutdown gracefully.
I0630 16:00:53.809101 4591930816 sc_process.py:210] Shutdown with return code: 1


ConnectionError: Error during save_replay: Socket error: [Errno 54] Connection reset by peer