# StarCraft II : testing the first agent

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from RelationalModule import CoordActorCritic
from importlib import reload

In [3]:
from pysc2.env import sc2_env

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [4]:
def init_game(interface_dict, max_steps_per_episode=1000, **kwargs):
    
    race = sc2_env.Race(1) # 1 = terran
    agent = sc2_env.Agent(race, "Testv0") # NamedTuple [race, agent_name]
    agent_interface_format = env.parse_agent_interface_format(**interface_dict) #AgentInterfaceFormat instance

    game_params = dict(map_name='MoveToBeacon', # simplest minigame
                       players=[agent], # use a list even for single player
                       game_steps_per_episode = max_steps_per_episode,
                       agent_interface_format=[agent_interface_format] # use a list even for single player
                       )  
    env = sc2_env.SC2Env(**game_params, **kwargs)
    
    return env

In [6]:
def get_state(obs):
    player_relative = obs[0].observation['feature_screen'][_PLAYER_RELATIVE]

    player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero()
    player_pos = [player_x.mean(), player_y.mean()]

    beacon_ys, beacon_xs = (player_relative == _PLAYER_NEUTRAL).nonzero()
    if beacon_ys.any():
        beacon_pos = [beacon_xs.mean(), beacon_ys.mean()]
    else:
        beacon_pos = [-1., -1.]

    beacon_exists = float(beacon_ys.any())

    selected = obs[0].observation['feature_screen'][_SELECTED]
    is_selected = np.any((selected==1).nonzero()[0]).astype(float) 

    state = np.concatenate([player_pos, beacon_pos, [beacon_exists, is_selected]])

    return state

In [None]:
def play_episode(agent, env, max_steps):

    # Start the episode
    obs = env.reset()
    state = get_state(obs)
    
    rewards = []
    log_probs = []
    distributions = []
    states = [state]
    done = []
    bootstrap = []
        
    steps = 0
    while True:
     
        action, log_prob, distrib = agent.step(obs, return_log = True)
        new_state, reward, terminal, info = env.step(action)
        if debug: print("state.shape: ", new_state.shape)
        rewards.append(reward)
        log_probs.append(log_prob)
        distributions.append(distrib)
        states.append(new_state)
        done.append(terminal)
        
        # Still unclear how to retrieve max steps from the game itself
        if terminal is True and steps == max_steps:
            bootstrap.append(True)
        else:
            bootstrap.append(False) 
        
        if terminal is True:
            #print("steps: ", steps)
            #print("Bootstrap needed: ", bootstrap[-1])
            break
            
        state = new_state
        steps += 1
        
    rewards = np.array(rewards)
    states = np.array(states)
    if debug: print("states.shape: ", states.shape)
    done = np.array(done)
    bootstrap = np.array(bootstrap)

    return rewards, log_probs, distributions, np.array(states), done, bootstrap

In [5]:
replay_dict = dict(save_replay_episodes=100,
                   replay_dir='Replays/',
                   replay_prefix='Agent1')