# A2C derivate classes

In [1]:
import sys
sys.path.insert(0, "../")
from SC_Utils.game_utils import ObsProcesser, get_action_dict
from SC_Utils.train_v2 import *
from AC_modules.BatchedA2C import SpatialA2C, SpatialA2C_v1, SpatialA2C_v2, SpatialA2C_v3
import AC_modules.Networks as net
import torch

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# Environment parameters
RESOLUTION = 32
MAX_STEPS = 256
game_params = dict(feature_screen=RESOLUTION, feature_minimap=RESOLUTION, action_space="FEATURES") 
game_names = {1:'MoveToBeacon',
              2:'CollectMineralShards',
              3:'DefeatRoaches',
              4:'FindAndDefeatZerglings',
              5:'DefeatZerglingsAndBanelings',
              6:'CollectMineralsAndGas',
              7:'BuildMarines'
              }
map_name = game_names[4]

# Observation Processer parameters
#screen_names = ['visibility_map', 'player_relative', 'selected', 'unit_density', 'unit_density_aa']
#minimap_names = []
#obs_proc_params = {'screen_names':screen_names, 'minimap_names':minimap_names}
obs_proc_params = {'select_all':True}

In [3]:
env = init_game(game_params, map_name)
op = ObsProcesser(**obs_proc_params)
screen_channels, minimap_channels = op.get_n_channels()
in_channels = screen_channels + minimap_channels 

In [4]:
#action_names = ['select_point', 'Move_screen']
#action_names = ['select_army','Move_screen']
action_names = ['select_army', 'Attack_screen', 'Move_screen', 'select_point', 'select_rect',
                'move_camera','Stop_quick','Move_minimap','Attack_minimap','HoldPosition_quick']
action_dict = get_action_dict(action_names)
action_space = len(action_dict)

In [5]:
spatial_model = net.FullyConvSpatial
nonspatial_model = net.FullyConvNonSpatial
embed_dim = 8
n_channels = 32
n_features = 256
spatial_dict = {"in_channels":in_channels}
nonspatial_dict = {'resolution':RESOLUTION, 'kernel_size':3, 'stride':2}

In [6]:
HPs = dict(action_space=action_space, gamma=0.99, n_steps=20, H=1e-3, 
           spatial_model=spatial_model, nonspatial_model=nonspatial_model,
           n_features=n_features, n_channels=n_channels, 
           spatial_dict=spatial_dict, nonspatial_dict=nonspatial_dict, 
           action_dict=action_dict)

if torch.cuda.is_available():
    HPs['device'] = 'cuda'
else:
    HPs['device'] = 'cpu'
    
print("Using device "+HPs['device'])

lr = 7e-4

Using device cuda


In [7]:
version = 2
if version == 1:
    HPs = {**HPs, 'embed_dim':embed_dim}
    agent = SpatialA2C_v1(env=env, **HPs)
elif version == 2:
    # no action embedding
    agent = SpatialA2C_v2(env=env, **HPs)
elif version == 3:
    agent = SpatialA2C_v3(env=env, **HPs)
else:
    raise Exception("Version not implemented.")


arg.name:  select_add
arg_name:  select_army/select_add
size:  (2,)
Init CategoricalNet for select_army/select_add argument

arg.name:  queued
arg_name:  Attack_screen/queued
size:  (2,)
Init CategoricalNet for Attack_screen/queued argument

arg.name:  screen
arg_name:  Attack_screen/screen
size:  (32, 32)
Init SpatialNet for Attack_screen/screen argument

arg.name:  queued
arg_name:  Move_screen/queued
size:  (2,)
Init CategoricalNet for Move_screen/queued argument

arg.name:  screen
arg_name:  Move_screen/screen
size:  (32, 32)
Init SpatialNet for Move_screen/screen argument

arg.name:  select_point_act
arg_name:  select_point/select_point_act
size:  (4,)
Init CategoricalNet for select_point/select_point_act argument

arg.name:  screen
arg_name:  select_point/screen
size:  (32, 32)
Init SpatialNet for select_point/screen argument

arg.name:  select_add
arg_name:  select_rect/select_add
size:  (2,)
Init CategoricalNet for select_rect/select_add argument

arg.name:  screen
arg_name:  

In [8]:
unroll_length = 60

train_dict = dict(n_train_processes = 11,
                  max_train_steps = unroll_length*20000,
                  unroll_length = unroll_length,
                  test_interval = unroll_length*20 #100
                  )

In [None]:
%%time
results = train_batched_A2C(agent, game_params, map_name, lr, 
                            obs_proc_params=obs_proc_params, action_dict=action_dict, **train_dict)

Process ID:  RAVT
Step # : 1200, avg score : 12.2
Step # : 2400, avg score : 12.0
Step # : 3600, avg score : 14.8
Step # : 4800, avg score : 16.0
Step # : 6000, avg score : 20.2
Step # : 7200, avg score : 16.0
Step # : 8400, avg score : 17.6
Step # : 9600, avg score : 18.6
Step # : 10800, avg score : 12.6
Step # : 12000, avg score : 19.0
Step # : 13200, avg score : 22.0
Step # : 14400, avg score : 23.4
Step # : 15600, avg score : 19.4
Step # : 16800, avg score : 24.6
Step # : 18000, avg score : 22.6
Step # : 19200, avg score : 21.4
Step # : 20400, avg score : 23.8
Step # : 21600, avg score : 20.0
Step # : 22800, avg score : 22.4
Step # : 24000, avg score : 21.6
Step # : 25200, avg score : 19.2
Step # : 26400, avg score : 18.8
Step # : 27600, avg score : 24.8
Step # : 28800, avg score : 22.6
Step # : 30000, avg score : 22.0
Step # : 31200, avg score : 21.4
Step # : 32400, avg score : 20.2
Step # : 33600, avg score : 21.4
Step # : 34800, avg score : 22.6
Step # : 36000, avg score : 23.0


Step # : 294000, avg score : 23.2
Step # : 295200, avg score : 22.4
Step # : 296400, avg score : 22.0
Step # : 297600, avg score : 21.2
Step # : 298800, avg score : 21.6
Step # : 300000, avg score : 23.0
Step # : 301200, avg score : 22.8
Step # : 302400, avg score : 21.0
Step # : 303600, avg score : 21.6
Step # : 304800, avg score : 22.2
Step # : 306000, avg score : 21.4
Step # : 307200, avg score : 21.4
Step # : 308400, avg score : 22.4
Step # : 309600, avg score : 21.2
Step # : 310800, avg score : 24.6
Step # : 312000, avg score : 21.6
Step # : 313200, avg score : 21.8
Step # : 314400, avg score : 21.6
Step # : 315600, avg score : 20.2
Step # : 316800, avg score : 23.0
Step # : 318000, avg score : 20.4
Step # : 319200, avg score : 22.8
Step # : 320400, avg score : 21.4
Step # : 321600, avg score : 22.0
Step # : 322800, avg score : 22.2
Step # : 324000, avg score : 21.2
Step # : 325200, avg score : 22.8
Step # : 326400, avg score : 19.6
Step # : 327600, avg score : 23.8
Step # : 32880

Step # : 583200, avg score : 22.6
Step # : 584400, avg score : 21.2
Step # : 585600, avg score : 22.2
Step # : 586800, avg score : 20.8
Step # : 588000, avg score : 22.8
Step # : 589200, avg score : 22.2
Step # : 590400, avg score : 22.8
Step # : 591600, avg score : 22.8
Step # : 592800, avg score : 20.6
Step # : 594000, avg score : 18.8
Step # : 595200, avg score : 20.4
Step # : 596400, avg score : 21.6
Step # : 597600, avg score : 22.2
Step # : 598800, avg score : 23.8
Step # : 600000, avg score : 21.8
Step # : 601200, avg score : 24.4
Step # : 602400, avg score : 22.0
Step # : 603600, avg score : 25.6
Step # : 604800, avg score : 23.2
Step # : 606000, avg score : 24.0
Step # : 607200, avg score : 23.4
Step # : 608400, avg score : 21.8
Step # : 609600, avg score : 26.0
Step # : 610800, avg score : 22.4
Step # : 612000, avg score : 22.0
Step # : 613200, avg score : 21.4
Step # : 614400, avg score : 22.0
Step # : 615600, avg score : 22.4
Step # : 616800, avg score : 22.0
Step # : 61800

In [26]:
%%time
results = train_batched_A2C(trained_agent, game_params, map_name, lr, 
                            obs_proc_params=obs_proc_params, action_dict=action_dict, **train_dict)

Process ID:  VVLG
Step # : 6000, avg score : 99.8
Step # : 12000, avg score : 94.0
Step # : 18000, avg score : 81.4
Step # : 24000, avg score : 99.6
Step # : 30000, avg score : 99.4
Step # : 36000, avg score : 95.0
Step # : 42000, avg score : 99.0
Step # : 48000, avg score : 95.2
Step # : 54000, avg score : 94.0
Step # : 60000, avg score : 96.0
Step # : 66000, avg score : 95.8
Step # : 72000, avg score : 98.2
Step # : 78000, avg score : 93.8
Step # : 84000, avg score : 101.6
Step # : 90000, avg score : 93.2
Step # : 96000, avg score : 98.0
Step # : 102000, avg score : 93.6
Step # : 108000, avg score : 100.6
Step # : 114000, avg score : 96.0
Step # : 120000, avg score : 82.6
Step # : 126000, avg score : 89.6
Step # : 132000, avg score : 97.8
Step # : 138000, avg score : 101.6
Step # : 144000, avg score : 99.2
Step # : 150000, avg score : 97.8
Step # : 156000, avg score : 92.0
Step # : 162000, avg score : 96.2
Step # : 168000, avg score : 91.6
Step # : 174000, avg score : 99.0
Step # : 1

In [10]:
score, losses, trained_agent, PID = results

In [11]:
from Utils import utils
save = True
keywords = ['A2C', 'CMS','embed-action',"conv-net",'lr-7e-4','20-steps', '32x32',"1.2M-env-steps","120-unroll-len",'7-channels', 'select-point'] 

if save:
    save_dir = '../Results/CollectMineralShards/'
    keywords.append(PID)
    filename = '_'.join(keywords)
    filename = 'S_'+filename
    print("Save at "+save_dir+filename)
    train_session_dict = dict(game_params=game_params, HPs=HPs, score=score, n_epochs=len(score), keywords=keywords, losses=losses)
    np.save(save_dir+filename, train_session_dict)
    torch.save(trained_agent, save_dir+"agent_"+PID)
else:
    print("Nothing saved")
    pass

Save at ../Results/CollectMineralShards/S_A2C_CMS_embed-action_conv-net_lr-7e-4_20-steps_32x32_1.2M-env-steps_120-unroll-len_7-channels_select-point_VYPP


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


# FiLM layer for action conditioning

How I condition actions right now:
- action = (b,)
- embedded_a = (b,d), d is embedding dimension (e.g. 8)
- for nonspatial features (b, n_features) just concatenate the embedded action: (b, n_features+d)
- for spatial features (b, n_channels, L, L) broadcast (actually repeat) the embedded action to (b,d,L,L) and then concatenate along the channel dimension: (b, n_channels+d, L, L) 

Then I use them as always, so processing from 1 or 2 layers before applying the softmax and sampling the argument.

With FiLM for spatial arguments:
- still use embedding
- use ReLU + linear layer to extract $\gamma_c$ and $\beta_c$ for each channel c in n_channels
- transform the spatial features as $\gamma_c F_c(x) + \beta_c$
- apply final convolution layer
- apply softmax

Same thing can be done for nonspatial arguments, assuming them as 1x1 images. So in this case the FiLM layer would be much more expressive, because it can achieve any possible output by means of scaling and shifting, so maybe is no more a good idea and a simple concatenation of the embedding action could do.

# Tests

In [16]:
class C1():
    def __init__(self):
        self._init_stuff()
        self.C = 10
        
    def _init_stuff(self):
        self.N = 10

In [17]:
class C2(C1):
    def __init__(self):
        self.embed = 10
        super().__init__()
        print(self.N)
        print(self.C)
        
    def _init_stuff(self):
        self.N = 10 + self.embed

In [18]:
C2()

20
10


<__main__.C2 at 0x7fa00f0bbf50>

In [16]:
B = 2
n_channels = 12
n_features = 256
embed_dim = 16
res = 32
spatial = torch.rand(B, n_channels, res, res)
nonspatial = torch.rand(B, n_features)
embedded_a = torch.rand(B, embed_dim)


In [29]:
print('embedded_a.shape; ', embedded_a.shape)
spatial_a = embedded_a.reshape((embedded_a.shape[:2]+(1,1,))).repeat(1,1,res,res)
for i in range(res):
    for j in range(res):
        print(torch.all(spatial_a[:,:,i,j] == embedded_a)) # copied correctly if True

embedded_a.shape;  torch.Size([2, 16])
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)

In [30]:
cat_spatial = torch.cat([spatial, spatial_a], dim=1)

In [31]:
cat_spatial.shape

torch.Size([2, 28, 32, 32])

In [32]:
cat_nonspatial = torch.cat([nonspatial, embedded_a], dim=1)
cat_nonspatial.shape

torch.Size([2, 272])