# Canonical Evolutionary Strategies for Atari Games

## Imports

In [1]:
from model import DQN
from AtariCES import AtariCES as CAF

## Parameters

### Game Parameters

In [2]:
game = "SpaceInvaders"
render = False
max_step = 2500

### Evolutionary Strategy Parameters

In [3]:
sigma = [0.5,0.01] # [start, end] or value
n_offspring = 25
n_parents = 5
iterations = 10
adaptive_type = 'log' #'constant', 'linear', 'exp', 'log'

## Initiate Environment

In [4]:
CAF = CAF(game, render, max_step=max_step, sigma=sigma,
        n_parents=n_parents, n_offspring=n_offspring, iterations=iterations, adaptive_type=adaptive_type)
        
env, n_actions, actions_meanings, state_dim = CAF.initiate_env()


### Environment Details

In [5]:
print(f"Number of actions: {n_actions}")
print(f"Action meanings: {actions_meanings}")
print(f"State dimensions: {state_dim}")

Number of actions: 6
Action meanings: ['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']
State dimensions: (4, 84, 84)


## Initiate Model

In [6]:
model = DQN(n_actions=n_actions)
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 20, 20, 32)        8224      
                                                                 
 batch_normalization_5 (Batc  (None, 20, 20, 32)       128       
 hNormalization)                                                 
                                                                 
 activation_5 (Activation)   (None, 20, 20, 32)        0         
                                                                 
 conv2d_4 (Conv2D)           (None, 9, 9, 64)          32832     
                                                                 
 batch_normalization_6 (Batc  (None, 9, 9, 64)         256       
 hNormalization)                                                 
                                                                 
 activation_6 (Activation)   (None, 9, 9, 64)         

In [7]:
CAF.set_model(model)

## Run

In [8]:
theta, rewards = CAF.CES()

Iteration:  1


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [09:00<00:00, 21.62s/it]


reward: [1275. 1160. 1125. 1075.  980.  960.  960.  960.  955.  850.  800.  785.
  780.  750.  725.  720.  690.  685.  665.  590.  560.  140.   20.    0.
    0.]
best reward: 1275.0
Iteration:  2


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:55<00:00, 21.43s/it]


reward: [1315.  960.  935.  870.  760.  720.  570.  550.  545.  520.  445.  420.
  285.  275.  235.  195.  190.  165.   95.   80.   50.   40.   20.   15.
    0.]
best reward: 1315.0
Iteration:  3


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:54<00:00, 21.37s/it]


reward: [1300. 1185. 1170.  960.  960.  915.  890.  890.  805.  750.  745.  730.
  720.  720.  600.  590.  445.  435.  355.  195.   95.   90.    0.    0.
    0.]
best reward: 1300.0
Iteration:  4


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:54<00:00, 21.36s/it]


reward: [1460. 1420. 1385. 1330. 1285. 1210. 1130. 1025.  770.  765.  720.  625.
  620.  620.  545.  530.  460.  370.  345.  305.  180.   25.    0.    0.
    0.]
best reward: 1460.0
Iteration:  5


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:54<00:00, 21.37s/it]


reward: [1315. 1310. 1265. 1235. 1195. 1070. 1070. 1025. 1020.  930.  870.  835.
  835.  820.  815.  810.  760.  720.  715.  695.  515.  450.  430.  315.
  200.]
best reward: 1315.0
Iteration:  6


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:55<00:00, 21.43s/it]


reward: [1405. 1335. 1315. 1315. 1315. 1280. 1255. 1100.  995.  960.  920.  855.
  795.  765.  755.  755.  735.  710.  710.  675.  495.  325.  320.  110.
   40.]
best reward: 1405.0
Iteration:  7


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:56<00:00, 21.44s/it]


reward: [1715. 1330. 1225. 1200. 1115. 1085. 1070.  960.  930.  915.  905.  900.
  820.  735.  670.  660.  630.  540.  290.  150.  140.  125.   45.   25.
    0.]
best reward: 1715.0
Iteration:  8


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:55<00:00, 21.40s/it]


reward: [1560. 1415. 1340. 1315. 1315. 1305. 1300. 1250. 1130. 1125.  915.  825.
  815.  755.  735.  730.  730.  690.  675.  615.  590.  525.  490.  460.
  300.]
best reward: 1560.0
Iteration:  9


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:55<00:00, 21.40s/it]


reward: [1410. 1410. 1315. 1315. 1035. 1005.  985.  970.  960.  960.  865.  860.
  840.  805.  765.  660.  655.  600.  545.  430.  355.  340.  250.  230.
    0.]
best reward: 1410.0
Iteration:  10


100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [08:56<00:00, 21.46s/it]


reward: [1360. 1330. 1315. 1225. 1140. 1105. 1095.  970.  960.  960.  955.  895.
  820.  805.  790.  760.  750.  720.  610.  540.  470.  425.  230.   95.
   70.]
best reward: 1360.0


## Show Final Generation

In [None]:
CAF.show_last(theta, max_step=max_step)

0.01: hoogste score laatste iteratie = 0, hoogste score over alle iteraties = 0
0.1: hoogste score laatste iteratie = 0, hoogste score over alle iteraties = 0