# Building an agent that will randomly play the Space Invaders fame

In [None]:
import gym
import cv2 as cv

In [None]:
# Downloading Atari Env
# import urllib.request
# urllib.request.urlretrieve('http://www.atarimania.com/roms/Roms.rar','Roms.rar')
# !pip install unrar
!unrar x Roms.rar
# !mkdir rars
# !mv HC\ ROMS.zip   rars
# !mv ROMS.zip  rars
# !python -m atari_py.import_roms rars


### The agent-environment loop

In [None]:
env=gym.make('SpaceInvaders-v0')

In [None]:
# Show the initial env state
cv.imshow('Initial state', env.reset())
cv.waitKey(0)
cv.destroyAllWindows()

print(f"Action space in the environment: {env.action_space}")
print(f"The state representation that the model will understand: {env.observation_space.shape}")

In [None]:
num_episodes = 100

for episode in range (num_episodes):
    state_0 = env.reset()
    done = False # When true => the agent lost (the end of an episode)
    score = 0
    num_states_in_episode = 0 
    while not done:
        env.render() # To show how the agent interact with the env
        action = env.action_space.sample() # take a random action
        #returns the observations ensued from the agent-env interaction
        state, reward, done, info = env.step(action)
        score += reward
    print(f"Episode: {episode}, Score: {score}")
env.close()

# Building agent that utilize NN to take better actions

In [None]:
import numpy as np
import gym
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam, SGD
env=gym.make('SpaceInvaders-v0')

In [None]:
# Building the NN network

def build_NN_model(actions,batch_size, hight, width, channels):
    nn_input = (batch_size, hight, width, channels)
    NN_model = Sequential([
        Conv2D(filters=32, kernel_size=(8,8), strides = (4, 4),activation= "relu", input_shape= nn_input),
        Conv2D(filters=64, kernel_size= (4,4), strides = (2,2),  activation= "relu"),
        Flatten(),
        Dense(units=512, activation="relu"),
        Dense(units=256, activation="relu"),
        Dense(units=actions, activation="linear")

    ])
    return NN_model
hight, width, channels = env.observation_space.shape
actions= env.action_space.n
batch_size = 2
NN_model = build_NN_model(actions,batch_size, hight, width, channels)

In [None]:
# building the deepQnetwork (agent)
from rl.agents import DQNAgent # The DQN algorithm (agent)
from rl.memory import SequentialMemory # The Tabular-like structure the agent will use to learn the Q-values
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy  
# The policies the RL agent will followes to learn Q-value,  as it's off-policy, the agent will use one greedy
# policy to always choose the greedy action (Q-value) and another pilicy that will break the greedy action
# selection by rate of $\epsilon$

def build_agent(model, actions, batch_size):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1 ,value_test=.2,
                                 nb_steps=100)
    memory_s = SequentialMemory(limit=100, window_length=batch_size)
    dqn = DQNAgent(model=model, memory=memory_s, policy=policy, nb_actions=actions, enable_dueling_network=True,
                   dueling_type='avg', nb_steps_warmup=1000)
    return dqn


In [None]:
# Note that you might get an error related to memory so you need to delete the NN_model from memory (using 
# del NN_model)and recreate it
del NN_model

In [None]:
DQN = build_agent(NN_model, actions, batch_size=batch_size)

In [None]:
DQN.compile(Adam(lr=0.0001))

In [None]:
# Training the model on the env
DQN.fit(env, nb_steps=3000, visualize=False, verbose=1)

## Testing the trained agent

In [None]:
## Testing the agent in the env for n episode episodes
scores = DQN.test(env, nb_episodes=10, visualize=True)

In [None]:
print(f"The avergae reward for {len(scores.history['nb_steps'])} episode is:\
 {np.mean(scores.history['episode_reward'])}")

## Saving and loading the model

In [None]:
# DQN.save_weights('models/dqn.hf5')
# DQN.load_weights('models/dqn.hf5')