# Canonical Evolutionary Strategies for Atari Games

## Imports

In [1]:
from model import DQN
from AtariCES import AtariCES as CAF

  f"Custom namespace `{spec.namespace}` is being overridden "


## Parameters

### Game Parameters

In [2]:
game = "SpaceInvaders"
render = False
max_step = 2500

### Evolutionary Strategy Parameters

In [3]:
sigma = 0.3
n_offspring = 25
n_parents = 5
iterations = 10
parent_selection = "tournament" # "topn", random", "tournament"

## Initiate Environment

In [4]:
CAF = CAF(game, render, max_step=max_step, sigma=sigma,
        n_parents=n_parents, n_offspring=n_offspring, iterations=iterations,
        parent_selection=parent_selection)
        
env, n_actions, actions_meanings, state_dim = CAF.initiate_env()


### Environment Details

In [5]:
print(f"Number of actions: {n_actions}")
print(f"Action meanings: {actions_meanings}")
print(f"State dimensions: {state_dim}")

Number of actions: 6
Action meanings: ['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']
State dimensions: (4, 84, 84)


## Initiate Model

In [6]:
model = DQN(n_actions=n_actions)
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 20, 20, 32)        8224      
                                                                 
 batch_normalization_5 (Batc  (None, 20, 20, 32)       128       
 hNormalization)                                                 
                                                                 
 activation_5 (Activation)   (None, 20, 20, 32)        0         
                                                                 
 conv2d_4 (Conv2D)           (None, 9, 9, 64)          32832     
                                                                 
 batch_normalization_6 (Batc  (None, 9, 9, 64)         256       
 hNormalization)                                                 
                                                                 
 activation_6 (Activation)   (None, 9, 9, 64)         

In [7]:
CAF.set_model(model)

## Run

In [8]:
theta, rewards = CAF.CES()

Iteration:  1


100%|██████████| 25/25 [09:06<00:00, 21.88s/it]


[   0.    0.    0.    0.    0.    0. 1315.    0.    0.    0.    0. 1315.
    0. 1315.    0. 1315.    0.  960. 1315. 1315. 1315.  960. 1315.  960.
    0.]
best reward: 1315.0
Iteration:  2


100%|██████████| 25/25 [08:58<00:00, 21.54s/it]


[1315. 1080. 1315.    0. 1315.    0.    0. 1315. 1315. 1315. 1315.    0.
 1315. 1315.  960. 1315.    0. 1315. 1315.  960. 1315.    0.    0.    0.
    0.]
best reward: 1315.0
Iteration:  3


100%|██████████| 25/25 [08:08<00:00, 19.52s/it]


[1315.  960.  960. 1315. 1315. 1315. 1315. 1315. 1315.  960. 1315. 1315.
 1315. 1315. 1315. 1315. 1315.    0.    0. 1315. 1315. 1315. 1315. 1315.
 1315.]
best reward: 1315.0
Iteration:  4


100%|██████████| 25/25 [07:11<00:00, 17.25s/it]


[1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.    0.    0. 1315. 1315.
 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315.]
best reward: 1315.0
Iteration:  5


100%|██████████| 25/25 [07:08<00:00, 17.13s/it]


[1315.    0.    0. 1315.    0.    0. 1315. 1315. 1315. 1315.  960. 1315.
 1315. 1315.    0. 1315. 1315. 1315.    0.    0.    0.    0. 1315. 1315.
 1315.]
best reward: 1315.0
Iteration:  6


100%|██████████| 25/25 [07:15<00:00, 17.42s/it]


[1315. 1315. 1315. 1315.    0. 1315.    0. 1315.    0. 1315.    0. 1315.
 1315. 1315. 1315. 1315.    0. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315.]
best reward: 1315.0
Iteration:  7


100%|██████████| 25/25 [07:14<00:00, 17.40s/it]


[1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315.]
best reward: 1315.0
Iteration:  8


100%|██████████| 25/25 [07:03<00:00, 16.96s/it]


[1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315.]
best reward: 1315.0
Iteration:  9


100%|██████████| 25/25 [07:10<00:00, 17.20s/it]


[1315. 1315. 1315. 1315. 1315. 1315. 1315. 1440. 1315. 1315. 1315.  960.
 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315.]
best reward: 1440.0
Iteration:  10


100%|██████████| 25/25 [07:03<00:00, 16.94s/it]

[1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315. 1315.
 1315.]
best reward: 1315.0





## Show Final Generation

In [9]:
CAF.show_last(theta, max_step=max_step)

Finished
