# Canonical Evolutionary Strategies for Atari Games

## Imports

In [1]:
from model import DQN
from AtariCES import AtariCES as CAF

  f"Custom namespace `{spec.namespace}` is being overridden "


## Parameters

### Game Parameters

In [2]:
game = "SpaceInvaders"
render = False
max_step = 2500

### Evolutionary Strategy Parameters

In [3]:
sigma = [0.5,0.01] # [start, end] or value
n_offspring = 25
n_parents = 5
iterations = 10
parent_selection = "topn" # "topn", random", "tournament"
adaptive_type = 'log' #'constant', 'linear', 'exp', 'log'

## Initiate Environment

In [4]:
CAF = CAF(game, render, max_step=max_step, sigma=sigma,
        n_parents=n_parents, n_offspring=n_offspring, iterations=iterations,
        parent_selection=parent_selection, adaptive_type=adaptive_type)
        
env, n_actions, actions_meanings, state_dim = CAF.initiate_env()


### Environment Details

In [5]:
print(f"Number of actions: {n_actions}")
print(f"Action meanings: {actions_meanings}")
print(f"State dimensions: {state_dim}")

Number of actions: 6
Action meanings: ['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']
State dimensions: (4, 84, 84)


## Initiate Model

In [6]:
model = DQN(n_actions=n_actions)
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 20, 20, 32)        8224      
                                                                 
 batch_normalization_5 (Batc  (None, 20, 20, 32)       128       
 hNormalization)                                                 
                                                                 
 activation_5 (Activation)   (None, 20, 20, 32)        0         
                                                                 
 conv2d_4 (Conv2D)           (None, 9, 9, 64)          32832     
                                                                 
 batch_normalization_6 (Batc  (None, 9, 9, 64)         256       
 hNormalization)                                                 
                                                                 
 activation_6 (Activation)   (None, 9, 9, 64)         

In [7]:
CAF.set_model(model)

## Run

In [8]:
theta, rewards = CAF.CES()

Iteration:  1


100%|██████████| 25/25 [08:19<00:00, 19.99s/it]


[   0. 1140.   40.  200.   85.  910. 1180.  320.  460.  730.  675. 1315.
  660.  860.  780.  720.  895.  570. 1230. 1135.  430.    0. 1335.   95.
    0.]
best reward: 1335.0
Iteration:  2


100%|██████████| 25/25 [08:10<00:00, 19.61s/it]


[ 405.  480. 1060.    0.  705. 1350.  520.  610.  235.  945.  370.   90.
  540.  310.  850.  255.  935.    0.   25. 1355.  110.  365.    0.  915.
  390.]
best reward: 1355.0
Iteration:  3


100%|██████████| 25/25 [07:40<00:00, 18.42s/it]


[ 880.    0.  580.  560.  665.  735. 1000.  800.  250. 1055.  680.  250.
  420.  545.  720.    0.  910.  460.  885.  685.   60.   45.  945.  120.
  760.]
best reward: 1055.0
Iteration:  4


100%|██████████| 25/25 [07:25<00:00, 17.81s/it]


[   0.  705.  750.  755. 1090.  930. 1030. 1060.  890.   95.  850.  745.
  885.  960.  300.  655.  480.    0.  250.  740.  565.  125.  610. 1315.
  605.]
best reward: 1315.0
Iteration:  5


100%|██████████| 25/25 [08:08<00:00, 19.55s/it]


[  95.  780.   15.   10.  900.  615.  640.   15.  250.   90.    0. 1315.
  580.  300.    0.  950.  535.  160.   85.  435.  850.  560. 1310.  170.
  195.]
best reward: 1315.0
Iteration:  6


100%|██████████| 25/25 [08:04<00:00, 19.38s/it]


[ 950.  290.   45.  960.  760. 1465.  140.  195.  755.  230.  960.   15.
  920.  840.  210.  765.  780.  675.  530.  535.  550. 1045. 1050. 1295.
  785.]
best reward: 1465.0
Iteration:  7


100%|██████████| 25/25 [08:07<00:00, 19.52s/it]


[ 300. 1335.   10.  160.  905.  720. 1445.    0.  800.  635.  860.  670.
    0.  565.  285.  840.  325.  720.  495.  435.  575.  155.  405.    0.
  815.]
best reward: 1445.0
Iteration:  8


100%|██████████| 25/25 [07:54<00:00, 19.00s/it]


[ 960.  255. 1105.  755.  330.  310.  190.  975.   25. 1505.  180.  230.
  770.  685.  820.  900.  755.  955.  240. 1050.  335.  730.  630. 1325.
 1385.]
best reward: 1505.0
Iteration:  9


100%|██████████| 25/25 [07:58<00:00, 19.13s/it]


[  75.  440.  395.   15.   40.   40.  700.  840.  440. 1110.  720. 1060.
  630.  960.  370.  990. 1405.   10.  270.  105.  960.  510. 1110.   60.
 1020.]
best reward: 1405.0
Iteration:  10


100%|██████████| 25/25 [07:45<00:00, 18.62s/it]


[ 770.  405. 1315.  140. 1100.  715.  495. 1515.  350.  790.    0.  840.
   25.  695.  385. 1070.   40.  700.  160.    0.   95.    0.  640.  535.
  170.]
best reward: 1515.0


## Show Final Generation

In [9]:
CAF.show_last(theta, max_step=max_step)

Finished
