# OpenAI Gym

 ## Imports

In [2]:
import numpy as np
import gym
from gym.core import ObservationWrapper
from gym.spaces import Box
from gym.spaces.box import Box
from gym.core import Wrapper
from model import DQN
from gym.wrappers import AtariPreprocessing, FrameStack

## Initiate Environment

In [3]:
env = gym.make('BreakoutNoFrameskip-v4')
env = AtariPreprocessing(env, noop_max=12)
env = FrameStack(env, num_stack=4)
env.reset()

<gym.wrappers.frame_stack.LazyFrames at 0x232d0a21c78>

## Environment Details

In [4]:
n_actions = env.action_space.n
actions_meanings = env.env.get_action_meanings()
state_dim = env.observation_space.shape
print(f"Number of actions: {n_actions}")
print(f"Action meanings: {actions_meanings}")
print(f"State dimensions: {state_dim}")

Number of actions: 4
Action meanings: ['NOOP', 'FIRE', 'RIGHT', 'LEFT']
State dimensions: (4, 84, 84)


## DQN Model Architecture

In [5]:
model = DQN()
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 20, 20, 32)        8224      
                                                                 
 batch_normalization (BatchN  (None, 20, 20, 32)       128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 20, 20, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 9, 9, 64)          32832     
                                                                 
 batch_normalization_1 (Batc  (None, 9, 9, 64)         256       
 hNormalization)                                                 
                                                                 
 activation_1 (Activation)   (None, 9, 9, 64)          0

## Play Breakout

In [6]:
episode_reward = 0

while True:
    action = env.action_space.sample()
    new_img, reward, done, info = env.step(action)
    new_img = new_img.__array__().transpose(1,2,0)
    new_img = np.expand_dims(new_img, axis=0)
    output = np.argmax(model(new_img).numpy())
    _, reward, done, _ = env.step(action)
    episode_reward += reward

    if done:
        print('Reward: %s' % episode_reward)
        break

Reward: 0.0
