# A2C derivate classes

In [1]:
import sys
sys.path.insert(0, "../")
from SC_Utils.game_utils import ObsProcesser, get_action_dict
from SC_Utils.train_v2 import *
from AC_modules.BatchedA2C import SpatialA2C
import AC_modules.Networks as net
import torch

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
from AC_modules.ActorCriticArchitecture import *

class ActionDependentA2C(SpatialA2C):
    def __init__(self, action_space, env, spatial_model, nonspatial_model, 
                 spatial_dict,  nonspatial_dict, n_features, n_channels, embed_dim,
                 gamma, action_dict=None, H=1e-3, n_steps=20, device='cpu'):
        # Do not use super().__init__()
        self.gamma = gamma
        self.n_actions = action_space
        self.n_steps = n_steps
        self.H = H
        self.AC = SpatialActorCritic_v1(action_space, env, spatial_model, nonspatial_model, spatial_dict, 
                                     nonspatial_dict, n_features, n_channels, action_dict, embed_dim)
        self.device = device 
        self.AC.to(self.device)
        
    def step(self, state, action_mask):
        state = torch.from_numpy(state).float().to(self.device)
        action_mask = torch.tensor(action_mask).to(self.device)
        
        log_probs, spatial_features, nonspatial_features = self.AC.pi(state, action_mask)
        probs = torch.exp(log_probs)
        entropy = self.compute_entropy(probs)
        a = Categorical(probs).sample()
        a = a.detach().cpu().numpy()
        embedded_a = self._embed_action(a)
        
        log_prob = log_probs[range(len(a)), a]
        
        # Concatenate embedded action to spatial and nonspatial features
        spatial_features = self._cat_action_to_spatial(embedded_a, spatial_features)
        nonspatial_features = self._cat_action_to_nonspatial(embedded_a, nonspatial_features)
        
        args, args_log_prob, args_entropy = self.get_arguments(spatial_features, nonspatial_features, a)
        log_prob = log_prob + args_log_prob
        entropy = entropy + args_entropy

        action_id = np.array([self.AC.action_dict[act] for act in a])
        action = [actions.FunctionCall(action_id[i], args[i]) for i in range(len(action_id))]

        return action, log_prob, torch.mean(entropy)

    def get_arguments(self, spatial_features, nonspatial_features, action):
        """
        Samples all possible arguments for each sample in the batch, then selects only those that
        apply to the selected actions and returns a list containing the list of arguments for every 
        sampled action, the logarithm of the probability of sampling those arguments and the entropy 
        of their distributions. If an action has more arguments the log probs and the entropies returned
        are the sum of all those of the single arguments.
        """
        ### Sample and store each argument with its log prob and entropy ###
        results = {}    
        for arg_name in self.AC.arguments_dict.keys():
            if self.AC.arguments_type[arg_name] == 'categorical':
                arg_sampled, log_prob, probs = self.AC.sample_param(nonspatial_features, arg_name)
            elif self.AC.arguments_type[arg_name] == 'spatial':
                arg_sampled, log_prob, probs = self.AC.sample_param(spatial_features, arg_name)
            else:
                raise Exception("argument type for "+arg_name+" not understood")  
            entropy = self.compute_entropy(probs)
            results[arg_name] = (arg_sampled, log_prob, entropy)
           
        ### For every action get the list of arguments and their log prob and entropy ###
        args, args_log_prob, args_entropy = [], [], []
        for i, a in enumerate(action):
            # default return values if no argument is sampled (like if there was a single value obtained with p=1)
            arg = []
            arg_log_prob = torch.tensor([0]).float().to(self.device)
            entropies = torch.tensor([0]).float().to(self.device)
            
            arg_names = self.AC.act_to_arg_names[a]
            values = list( map(results.get, arg_names) )
            if len(values) != 0:
                for j in range(len(values)):
                    # j is looping on the tuples (arg, log_prob, ent)
                    # Second index is for accessing tuples items
                    # i is for the sample index inside the batch
                    arg.append(list(values[j][0][i]))
                    arg_log_prob = arg_log_prob + values[j][1][i] # sum log_probs
                    entropies = entropies + values[j][2][i] # sum entropies
            args.append(arg)
            args_log_prob.append(arg_log_prob) 
            args_entropy.append(entropies)
            
        args_log_prob = torch.stack(args_log_prob, axis=0).squeeze()
        args_entropy = torch.stack(args_entropy, axis=0).squeeze()
        return args, args_log_prob, args_entropy
    
    def _embed_action(self, action):
        a = torch.LongTensor(action).to(self.device)
        a = self.AC.embedding(a)
        return a
    
    def _cat_action_to_spatial(self, embedded_action, spatial_repr):
        """ 
        Assume spatial_repr of shape (B, n_channels, res, res).
        Cast embedded_action from (B, embedd_dim) to (B, embedd_dim, res, res)
        Concatenate spatial_repr with the broadcasted embedded action along the channel dim.
        """
        res = spatial_repr.shape[-1]
        embedded_action = embedded_action.reshape((embedded_action.shape[:2]+(1,1,)))
        spatial_a = embedded_action.repeat(1,1,res,res)
        spatial_repr = torch.cat([spatial_repr, spatial_a], dim=1)
        return spatial_repr
    
    def _cat_action_to_nonspatial(self, embedded_action, nonspatial_repr):
        """
        nonspatial_repr: (B, n_features)
        embedded_action: (B, embedd_dim)
        Concatenate them so that the result is of shape (B, n_features+embedd_dim)
        """
        return torch.cat([nonspatial_repr, embedded_action], dim=1)

In [6]:
# Environment parameters
RESOLUTION = 32
MAX_STEPS = 256
game_params = dict(feature_screen=RESOLUTION, feature_minimap=RESOLUTION, action_space="FEATURES") 
game_names = {1:'MoveToBeacon',
              2:'CollectMineralShards',
              3:'DefeatRoaches',
              4:'FindAndDefeatZerglings',
              5:'DefeatZerglingsAndBanelings',
              6:'CollectMineralsAndGas',
              7:'BuildMarines'
              }
map_name = game_names[2]

# Observation Processer parameters
screen_names = ['visibility_map', 'player_relative', 'selected', 'unit_density', 'unit_density_aa']
minimap_names = []
obs_proc_params = {'screen_names':screen_names, 'minimap_names':minimap_names}
#obs_proc_params = {'select_all':True}

In [7]:
env = init_game(game_params, map_name, max_steps=MAX_STEPS)
op = ObsProcesser(**obs_proc_params)
screen_channels, minimap_channels = op.get_n_channels()
in_channels = screen_channels + minimap_channels 

In [8]:
#action_names = ['no_op', 'select_army', 'Attack_screen', 'Move_screen']
action_names = ['no_op', 'select_army', 'Attack_screen', 'Move_screen', 'select_point', 'select_rect']
action_dict = get_action_dict(action_names)
action_space = len(action_dict)

In [9]:
spatial_model = net.FullyConvSpatial
nonspatial_model = net.FullyConvNonSpatial
embed_dim = 8
n_channels = 32
n_features = 256
spatial_dict = {"in_channels":in_channels}
nonspatial_dict = {'resolution':RESOLUTION, 'kernel_size':3, 'stride':2}

In [10]:
HPs = dict(action_space=action_space, gamma=0.99, n_steps=20, H=1e-3, 
           spatial_model=spatial_model, nonspatial_model=nonspatial_model,
           n_features=n_features, n_channels=n_channels, 
           spatial_dict=spatial_dict, nonspatial_dict=nonspatial_dict, 
           action_dict=action_dict, embed_dim=embed_dim)

if torch.cuda.is_available():
    HPs['device'] = 'cuda'
else:
    HPs['device'] = 'cpu'
    
print("Using device "+HPs['device'])

lr = 7e-4

Using device cuda


In [11]:
agent = ActionDependentA2C(env=env, **HPs)

In [12]:
unroll_length = 120

train_dict = dict(n_train_processes = 11,
                  max_train_steps = unroll_length*10000,
                  unroll_length = unroll_length,
                  max_episode_steps = MAX_STEPS,
                  test_interval = unroll_length*50 #100
                  )

In [13]:
%%time
results = train_batched_A2C(agent, game_params, map_name, lr, 
                            obs_proc_params=obs_proc_params, action_dict=action_dict, **train_dict)

Process ID:  YOEI
Step # : 6000, avg score : 19.4
Step # : 12000, avg score : 24.0
Step # : 18000, avg score : 20.8
Step # : 24000, avg score : 28.6
Step # : 30000, avg score : 27.2
Step # : 36000, avg score : 26.4
Step # : 42000, avg score : 26.4
Step # : 48000, avg score : 29.0
Step # : 54000, avg score : 27.8
Step # : 60000, avg score : 22.2
Step # : 66000, avg score : 26.2
Step # : 72000, avg score : 27.6
Step # : 78000, avg score : 29.0
Step # : 84000, avg score : 22.0
Step # : 90000, avg score : 26.0
Step # : 96000, avg score : 26.4
Step # : 102000, avg score : 31.4
Step # : 108000, avg score : 30.0
Step # : 114000, avg score : 31.4
Step # : 120000, avg score : 25.8
Step # : 126000, avg score : 29.2
Step # : 132000, avg score : 35.6
Step # : 138000, avg score : 32.0
Step # : 144000, avg score : 38.6
Step # : 150000, avg score : 54.0
Step # : 156000, avg score : 39.6
Step # : 162000, avg score : 52.8
Step # : 168000, avg score : 52.4
Step # : 174000, avg score : 52.4
Step # : 1800

In [26]:
%%time
results = train_batched_A2C(trained_agent, game_params, map_name, lr, 
                            obs_proc_params=obs_proc_params, action_dict=action_dict, **train_dict)

Process ID:  VVLG
Step # : 6000, avg score : 99.8
Step # : 12000, avg score : 94.0
Step # : 18000, avg score : 81.4
Step # : 24000, avg score : 99.6
Step # : 30000, avg score : 99.4
Step # : 36000, avg score : 95.0
Step # : 42000, avg score : 99.0
Step # : 48000, avg score : 95.2
Step # : 54000, avg score : 94.0
Step # : 60000, avg score : 96.0
Step # : 66000, avg score : 95.8
Step # : 72000, avg score : 98.2
Step # : 78000, avg score : 93.8
Step # : 84000, avg score : 101.6
Step # : 90000, avg score : 93.2
Step # : 96000, avg score : 98.0
Step # : 102000, avg score : 93.6
Step # : 108000, avg score : 100.6
Step # : 114000, avg score : 96.0
Step # : 120000, avg score : 82.6
Step # : 126000, avg score : 89.6
Step # : 132000, avg score : 97.8
Step # : 138000, avg score : 101.6
Step # : 144000, avg score : 99.2
Step # : 150000, avg score : 97.8
Step # : 156000, avg score : 92.0
Step # : 162000, avg score : 96.2
Step # : 168000, avg score : 91.6
Step # : 174000, avg score : 99.0
Step # : 1

In [24]:
score, losses, trained_agent, PID = results

In [25]:
from Utils import utils
save = True
keywords = ['A2C', 'CMS','embed-action',"conv-net",'lr-7e-4','20-steps', '32x32',"1.2M-env-steps","120-unroll-len",'7-channels', 'YOEI-pt4'] 

if save:
    save_dir = '../Results/CollectMineralShards/'
    keywords.append(PID)
    filename = '_'.join(keywords)
    filename = 'S_'+filename
    print("Save at "+save_dir+filename)
    train_session_dict = dict(game_params=game_params, HPs=HPs, score=score, n_epochs=len(score), keywords=keywords, losses=losses)
    np.save(save_dir+filename, train_session_dict)
    torch.save(trained_agent, save_dir+"agent_"+PID)
else:
    print("Nothing saved")
    pass

Save at ../Results/CollectMineralShards/S_A2C_CMS_embed-action_conv-net_lr-7e-4_20-steps_32x32_1.2M-env-steps_120-unroll-len_7-channels_YOEI-pt4_JLJS


# Tests

In [16]:
class C1():
    def __init__(self):
        self._init_stuff()
        self.C = 10
        
    def _init_stuff(self):
        self.N = 10

In [17]:
class C2(C1):
    def __init__(self):
        self.embed = 10
        super().__init__()
        print(self.N)
        print(self.C)
        
    def _init_stuff(self):
        self.N = 10 + self.embed

In [18]:
C2()

20
10


<__main__.C2 at 0x7fa00f0bbf50>

In [16]:
B = 2
n_channels = 12
n_features = 256
embed_dim = 16
res = 32
spatial = torch.rand(B, n_channels, res, res)
nonspatial = torch.rand(B, n_features)
embedded_a = torch.rand(B, embed_dim)


In [29]:
print('embedded_a.shape; ', embedded_a.shape)
spatial_a = embedded_a.reshape((embedded_a.shape[:2]+(1,1,))).repeat(1,1,res,res)
for i in range(res):
    for j in range(res):
        print(torch.all(spatial_a[:,:,i,j] == embedded_a)) # copied correctly if True

embedded_a.shape;  torch.Size([2, 16])
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)
tensor(True)

In [30]:
cat_spatial = torch.cat([spatial, spatial_a], dim=1)

In [31]:
cat_spatial.shape

torch.Size([2, 28, 32, 32])

In [32]:
cat_nonspatial = torch.cat([nonspatial, embedded_a], dim=1)
cat_nonspatial.shape

torch.Size([2, 272])