# Building an agent that will randomly play the Space Invaders fame

In [1]:
import gym
import cv2 as cv

In [8]:
# Downloading Atari Env
# import urllib.request
# urllib.request.urlretrieve('http://www.atarimania.com/roms/Roms.rar','Roms.rar')
# !pip install unrar
!unrar x Roms.rar
# !mkdir rars
# !mv HC\ ROMS.zip   rars
# !mv ROMS.zip  rars
# !python -m atari_py.import_roms rars


/bin/bash: unrar: command not found


### The agent-environment loop

In [2]:
env=gym.make('SpaceInvaders-v0')

In [None]:
# Show the initial env state
cv.imshow('Initial state', env.reset())
cv.waitKey(0)
cv.destroyAllWindows()

print(f"Action space in the environment: {env.action_space}")
print(f"The state representation that the model will understand: {env.observation_space.shape}")

In [4]:
num_episodes = 100

for episode in range (num_episodes):
    state_0 = env.reset()
    done = False # When true => the agent lost (the end of an episode)
    score = 0
    num_states_in_episode = 0 
    while not done:
        env.render() # To show how the agent interact with the env
        action = env.action_space.sample() # take a random action
        #returns the observations ensued from the agent-env interaction
        state, reward, done, info = env.step(action)
        score += reward
    print(f"Episode: {episode}, Score: {score}")
env.close()

Episode: 0, Score: 115.0
Episode: 1, Score: 235.0
Episode: 2, Score: 405.0
Episode: 3, Score: 180.0
Episode: 4, Score: 30.0
Episode: 5, Score: 145.0
Episode: 6, Score: 225.0
Episode: 7, Score: 110.0
Episode: 8, Score: 30.0
Episode: 9, Score: 105.0
Episode: 10, Score: 75.0
Episode: 11, Score: 365.0
Episode: 12, Score: 105.0
Episode: 13, Score: 390.0
Episode: 14, Score: 15.0
Episode: 15, Score: 210.0
Episode: 16, Score: 515.0
Episode: 17, Score: 210.0
Episode: 18, Score: 670.0
Episode: 19, Score: 50.0
Episode: 20, Score: 75.0
Episode: 21, Score: 155.0
Episode: 22, Score: 135.0
Episode: 23, Score: 155.0
Episode: 24, Score: 180.0
Episode: 25, Score: 155.0
Episode: 26, Score: 180.0
Episode: 27, Score: 95.0
Episode: 28, Score: 430.0
Episode: 29, Score: 135.0
Episode: 30, Score: 75.0
Episode: 31, Score: 105.0
Episode: 32, Score: 80.0
Episode: 33, Score: 290.0
Episode: 34, Score: 165.0
Episode: 35, Score: 75.0
Episode: 36, Score: 110.0
Episode: 37, Score: 125.0
Episode: 38, Score: 165.0
Episod

# Building agent that utilize NN to take better actions

In [2]:
import numpy as np
import gym
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam, SGD
env=gym.make('SpaceInvaders-v0')

2021-07-15 16:32:47.333599: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-07-15 16:32:47.333677: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [15]:
# Building the NN network

def build_NN_model(actions,batch_size, hight, width, channels):
    nn_input = (batch_size, hight, width, channels)
    NN_model = Sequential([
        Conv2D(filters=32, kernel_size=(8,8), strides = (4, 4),activation= "relu", input_shape= nn_input),
        Conv2D(filters=64, kernel_size= (4,4), strides = (2,2),  activation= "relu"),
        Flatten(),
        Dense(units=512, activation="relu"),
        Dense(units=256, activation="relu"),
        Dense(units=actions, activation="linear")

    ])
    return NN_model
hight, width, channels = env.observation_space.shape
actions= env.action_space.n
batch_size = 2
NN_model = build_NN_model(actions,batch_size, hight, width, channels)

In [13]:
# building the deepQnetwork (agent)
from rl.agents import DQNAgent # The DQN algorithm (agent)
from rl.memory import SequentialMemory # The Tabular-like structure the agent will use to learn the Q-values
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy  
# The policies the RL agent will followes to learn Q-value,  as it's off-policy, the agent will use one greedy
# policy to always choose the greedy action (Q-value) and another pilicy that will break the greedy action
# selection by rate of $\epsilon$

def build_agent(model, actions, batch_size):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1 ,value_test=.2,
                                 nb_steps=100)
    memory_s = SequentialMemory(limit=100, window_length=batch_size)
    dqn = DQNAgent(model=model, memory=memory_s, policy=policy, nb_actions=actions, enable_dueling_network=True,
                   dueling_type='avg', nb_steps_warmup=1000)
    return dqn


In [14]:
# Note that you might get an error related to memory so you need to delete the NN_model from memory (using 
# del NN_model)and recreate it
del NN_model

In [16]:
DQN = build_agent(NN_model, actions, batch_size=batch_size)

In [17]:
DQN.compile(Adam(lr=0.0001))

In [20]:
# Training the model on the env
DQN.fit(env, nb_steps=3000, visualize=False, verbose=1)

Training for 3000 steps ...
Interval 1 (0 steps performed)
 1286/10000 [==>...........................] - ETA: 34:57 - reward: 0.2022done, took 310.004 seconds


<tensorflow.python.keras.callbacks.History at 0x7f85b057c1c0>

## Testing the trained agent

In [21]:
## Testing the agent in the env for n episode episodes
scores = DQN.test(env, nb_episodes=10, visualize=True)

Testing for 10 episodes ...
Episode 1: reward: 25.000, steps: 361
Episode 2: reward: 135.000, steps: 656
Episode 3: reward: 15.000, steps: 403
Episode 4: reward: 315.000, steps: 911
Episode 5: reward: 75.000, steps: 956
Episode 6: reward: 135.000, steps: 718
Episode 7: reward: 25.000, steps: 387
Episode 8: reward: 260.000, steps: 1251
Episode 9: reward: 150.000, steps: 670
Episode 10: reward: 225.000, steps: 906


In [37]:
print(f"The avergae reward for {len(scores.history['nb_steps'])} episode is:\
 {np.mean(scores.history['episode_reward'])}")

The avergae reward for 10 episode is: 136.0


## Saving and loading the model

In [None]:
# DQN.save_weights('models/dqn.hf5')
# DQN.load_weights('models/dqn.hf5')