# Multi Game training

##  Version 1: Introduce ObsProcesser and merge screen and minimap layers - OK
## Version 2: Introduce custom action space

Still to do:
1. Extend and customize action space (not just no-op, select-army and attack-screen)
2. Generalize to screen and minimap with different resolutions

In [1]:
import sys
sys.path.insert(0, "../")
from SC_Utils.game_utils import ObsProcesser, get_action_dict
from SC_Utils.train_v2 import *
from AC_modules.BatchedA2C import SpatialA2C
import AC_modules.Networks as net
import torch

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# Environment parameters
RESOLUTION = 32
MAX_STEPS = 256
game_params = dict(feature_screen=RESOLUTION, feature_minimap=RESOLUTION, action_space="FEATURES") 
game_names = {1:'MoveToBeacon',
              2:'CollectMineralShards',
              3:'DefeatRoaches',
              4:'FindAndDefeatZerglings',
              5:'DefeatZerglingsAndBanelings',
              6:'CollectMineralsAndGas',
              7:'BuildMarines'
              }
map_name = game_names[1]

# Observation Processer parameters
screen_names = ['visibility_map', 'player_relative', 'selected', 'unit_density', 'unit_density_aa']
minimap_names = []
obs_proc_params = {'screen_names':screen_names, 'minimap_names':minimap_names}
#obs_proc_params = {'select_all':True}

In [3]:
env = init_game(game_params, map_name)
op = ObsProcesser(**obs_proc_params)
screen_channels, minimap_channels = op.get_n_channels()
in_channels = screen_channels + minimap_channels 

In [4]:
action_names = ['no_op', 'select_army', 'Attack_screen', 'Move_screen']
#action_names = ['no_op', 'select_army', 'Attack_screen', 'Move_screen', 'select_point', 'select_rect']
action_dict = get_action_dict(action_names)
action_space = len(action_dict)

In [5]:
# Define the two models used in the architecture
model_number = 0
model_names = {0:"FullyConvNet_v0", 1:"FullyConvNet_v1", 2:"FullyConvNet_V2", 3:"ControlNet", 4:"GatedRelationalNet"}

if model_number == 0:
    print(model_names[model_number]+" selected.")
    spatial_model = net.FullyConvSpatial
    nonspatial_model = net.FullyConvNonSpatial
    n_channels = 32
    n_features = 256
    spatial_dict = {"in_channels":in_channels}
    nonspatial_dict = {'resolution':RESOLUTION, 'kernel_size':3, 'stride':2}
    
elif model_number == 1:
    print(model_names[model_number]+" selected.")
    spatial_model = net.FullyConvSpatial_v1
    nonspatial_model = net.FullyConvNonSpatial_v1
    n_channels = 32
    n_features = 256
    spatial_dict = {"in_channels":3, 'resolution':RESOLUTION}
    nonspatial_dict = {'resolution':RESOLUTION}

elif model_number == 2:
    print(model_names[model_number]+" selected.")
    spatial_model = net.FullyConvSpatial_v2
    nonspatial_model = net.FullyConvNonSpatial_v2
    n_channels = 32
    n_features = 256
    spatial_dict = {"in_channels":6, 'resolution':RESOLUTION}
    nonspatial_dict = {'resolution':RESOLUTION}
    
elif model_number == 3:
    print(model_names[model_number]+" selected.")
    spatial_model = net.SpatialFeatures
    nonspatial_model = net.NonSpatialFeatures
    n_channels = 36
    n_features = 36
    spatial_dict = dict(n_layers=1, linear_size=RESOLUTION, in_channels=6, n_channels=n_features)
    nonspatial_dict = dict(linear_size=RESOLUTION, n_channels=n_features, pixel_hidden_dim=128, pixel_n_residuals=2, 
                 feature_hidden_dim=64, feature_n_residuals=2)
elif model_number == 4:
    print(model_names[model_number]+" selected.")
    spatial_model = net.SpatialFeatures
    nonspatial_model = net.GatedRelationalNet
    n_channels = 32
    n_features = 32
    spatial_dict = dict(n_layers=2, linear_size=RESOLUTION, in_channels=3, n_channels=n_features)

    nonspatial_dict = dict(n_kernels=n_features, n_features=n_features, n_heads=1, n_attn_modules=2, 
                     feature_hidden_dim=16, feature_n_residuals=1)

else:
    print("Model number not available. Choose 0, 1, 2 or 3.")

FullyConvNet_v0 selected.


In [6]:
HPs = dict(action_space=action_space, gamma=0.99, n_steps=20, H=1e-3, 
           spatial_model=spatial_model, nonspatial_model=nonspatial_model,
           n_features=n_features, n_channels=n_channels, 
           spatial_dict=spatial_dict, nonspatial_dict=nonspatial_dict, action_dict=action_dict)

if torch.cuda.is_available():
    HPs['device'] = 'cuda'
else:
    HPs['device'] = 'cpu'
    
print("Using device "+HPs['device'])

lr = 7e-4
agent = SpatialA2C(env=env, **HPs)

Using device cuda


In [7]:
unroll_length = 120

train_dict = dict(n_train_processes = 11,
                  max_train_steps = unroll_length*500,
                  unroll_length = unroll_length,
                  test_interval = unroll_length*10 #100
                  )

In [8]:
%%time
results = train_batched_A2C(agent, game_params, map_name, lr, 
                            obs_proc_params=obs_proc_params, action_dict=action_dict, **train_dict)

Process ID:  QPGK
Step # : 1200, avg score : 1.2
Step # : 2400, avg score : 0.8
Step # : 3600, avg score : 2.2
Step # : 4800, avg score : 2.8
Step # : 6000, avg score : 7.8
Step # : 7200, avg score : 20.8
Step # : 8400, avg score : 24.8
Step # : 9600, avg score : 24.6
Step # : 10800, avg score : 25.2
Step # : 12000, avg score : 24.2
Step # : 13200, avg score : 25.0
Step # : 14400, avg score : 24.6


Process Process-8:
Process Process-1:
Process Process-11:
Process Process-9:
Process Process-4:
Process Process-5:
Process Process-2:
Process Process-10:
Process Process-7:
Process Process-3:
Process Process-6:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/m/work/modules/automatic/anaconda/e

  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "../SC_Utils/train_v2.py", line 70, in worker
    cmd, data = worker_end.recv()
  File "../SC_Utils/train_v2.py", line 70, in worker
    cmd, data = worker_end.recv()
Traceback (most recent call last):
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/multiprocessing/connection.py", line 

Traceback (most recent call last):
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/site-packages/IPython/core/magics/execution.py", line 1312, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 2, in <module>
  File "../SC_Utils/train_v2.py", line 222, in train_batched_A2C
    loss.backward()
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/site-packages/torch/tensor.py", line 166, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/m/work/modules/automatic/anaconda/envs/aalto-ubuntu1804-generic/software/anaconda/2020-01-tf2/5a34a04a/lib/python3.7/site-packages/torch/autograd/__init__.py", line 99, in backward
    allow_unreachable=True)  # allow_unreachable flag
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call 

KeyboardInterrupt: 

In [8]:
score, losses, trained_agent, PID = results

In [9]:
from Utils import utils
save = True
keywords = ['A2C', 'CMS',"conv-net",'lr-7e-4','20-steps', '32x32',"120k-env-steps","120-unroll-len",'32-channels'] 

if save:
    save_dir = '../Results/MoveToBeacon/'
    keywords.append(PID)
    filename = '_'.join(keywords)
    filename = 'S_'+filename
    print("Save at "+save_dir+filename)
    train_session_dict = dict(game_params=game_params, HPs=HPs, score=score, n_epochs=len(score), keywords=keywords, losses=losses)
    np.save(save_dir+filename, train_session_dict)
    torch.save(trained_agent, save_dir+"agent_"+PID)
else:
    print("Nothing saved")
    pass

Save at ../Results/MoveToBeacon/S_A2C_CMS_conv-net_lr-7e-4_20-steps_32x32_120k-env-steps_120-unroll-len_32-channels_XBCD


  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
