# OpenAI Gym

## Imports

In [1]:
import gym
import numpy as np
from model import DQN
from gym.wrappers import AtariPreprocessing, FrameStack
import matplotlib.pyplot as plt
import tensorflow as tf

  f"Custom namespace `{spec.namespace}` is being overridden "
  import imp


## Parameters

## Initiate Environment

In [2]:
env = gym.make('BreakoutNoFrameskip-v4')
env = AtariPreprocessing(env, noop_max=30)
env = FrameStack(env, num_stack=4)
env.reset()

<gym.wrappers.frame_stack.LazyFrames at 0x1e8d7880228>

## Environment Details

In [3]:
n_actions = env.action_space.n
actions_meanings = env.env.get_action_meanings()
state_dim = env.observation_space.shape
print(f"Number of actions: {n_actions}")
print(f"Action meanings: {actions_meanings}")
print(f"State dimensions: {state_dim}")

Number of actions: 4
Action meanings: ['NOOP', 'FIRE', 'RIGHT', 'LEFT']
State dimensions: (4, 84, 84)


## DQN Model Architecture

In [4]:
model = DQN()
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 20, 20, 32)        8224      
                                                                 
 batch_normalization (BatchN  (None, 20, 20, 32)       128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 20, 20, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 9, 9, 64)          32832     
                                                                 
 batch_normalization_1 (Batc  (None, 9, 9, 64)         256       
 hNormalization)                                                 
                                                                 
 activation_1 (Activation)   (None, 9, 9, 64)          0

## Play Breakout

In [5]:
def plot_frames(frames):
    fig, axs = plt.subplots(1, frames.shape[2])
    for i, ax in enumerate(axs.flat):
        ax.imshow(frames[:,:,i], cmap="gray")
        ax.axis("off")
        ax.set_title(f"frame {i+1}")
    plt.savefig("processed_input.png")

In [6]:
def get_frames(observation):
    observation = observation.__array__().transpose(1,2,0)
    observation = np.expand_dims(observation, axis=0)
    
    return observation

In [7]:
def episode(model, max_step=10000):
    env = gym.make('BreakoutNoFrameskip-v4')
    env = AtariPreprocessing(env, noop_max=30)
    env = FrameStack(env, num_stack=4)
    frames = get_frames(env.reset())

    episode_reward = 0
    step = 0

    while step < max_step:
        step += 1

        action = np.argmax(model(frames).numpy())
        frames, reward, done, info = env.step(action)
        frames = get_frames(frames)

        episode_reward += reward

        if done: 
            frames = get_frames(env.reset())

    return episode_reward

    

In [8]:
def get_weights(parents):
    W = [np.log(parents - 0.5) - np.log(i) for i in range(1, parents+1)]
    W /= np.sum(W)
    
    return W

In [9]:
def get_start_parameters(model):
    parameters = model.get_weights()

    return np.concatenate(parameters, axis=None)


In [10]:
def get_model_weights(theta, mut_stepsize, e):
    model = DQN()
    parameters = model.get_weights()
    new_weights = []
    start_idx = 0
    w = theta + mut_stepsize * e

    for p in parameters:
        n = len(p.flatten())
        new_weights.append(w[start_idx:(start_idx+n)].reshape(p.shape))
        start_idx += n

    model.set_weights(new_weights)
    
    return model

In [11]:
def CES(model, mut_stepsize, parents, n_offspring, iterations):
    theta = get_start_parameters(model)
    W = get_weights(parents)
    best_r = np.zeros((iterations))
    
    for t in range(iterations):
        print('Iteration: ',t+1)
        e = np.zeros((n_offspring, theta.shape[0]))
        r = np.zeros((n_offspring))

        for i in range(n_offspring):
            e[i] = np.random.normal(0, mut_stepsize**2, size=theta.shape)
            new_model = get_model_weights(theta, mut_stepsize, e[i])
            r[i] = episode(new_model)

        best_rs = r.argsort()
        best_r[t] = np.max(r)
        best_es = e[best_rs][:parents]
        
        theta += mut_stepsize * np.sum([W[i] * best_es[i] for i in range(len(W))], axis=0)

    return best_r

In [12]:
model = DQN()
rewards = CES(model, 0.1, 10, 20, 10)

Iteration:  1


In [None]:
print(rewards)