### Importing libraries

In [1]:
import gym
import atari_py

## Show the list of games

In [2]:
print(atari_py.list_games())

['adventure', 'air_raid', 'alien', 'amidar', 'assault', 'asterix', 'asteroids', 'atlantis', 'bank_heist', 'battle_zone', 'beam_rider', 'berzerk', 'bowling', 'boxing', 'breakout', 'carnival', 'centipede', 'chopper_command', 'crazy_climber', 'defender', 'demon_attack', 'donkey_kong', 'double_dunk', 'elevator_action', 'enduro', 'fishing_derby', 'freeway', 'frogger', 'frostbite', 'galaxian', 'gopher', 'gravitar', 'hero', 'ice_hockey', 'jamesbond', 'journey_escape', 'kaboom', 'kangaroo', 'keystone_kapers', 'king_kong', 'koolaid', 'krull', 'kung_fu_master', 'laser_gates', 'lost_luggage', 'montezuma_revenge', 'mr_do', 'ms_pacman', 'name_this_game', 'pacman', 'phoenix', 'pitfall', 'pong', 'pooyan', 'private_eye', 'qbert', 'riverraid', 'road_runner', 'robotank', 'seaquest', 'sir_lancelot', 'skiing', 'solaris', 'space_invaders', 'star_gunner', 'surround', 'tennis', 'tetris', 'time_pilot', 'trondead', 'tutankham', 'up_n_down', 'venture', 'video_pinball', 'wizard_of_wor', 'yars_revenge', 'zaxxon']

## Creating our environment as spaceInvaders

In [3]:
env = gym.make('SpaceInvaders-v0')

## Creating episodes

In [4]:
episodes = 10 #epochs

for episode in range(1,episodes):
    state = env.reset() #Every time we iterate we reset state to its original poistion and restart our agent at the begining
    done = False #wether our agent has completed the level
    score = 0
    
    while not done:
        env.render() #use to visualise what action our agent is doing
        state,reward,done,info = env.step(env.action_space.sample()) #the action that our agents will take within each frame
        #In each frame our agent will take action,now we are just going to take a random action
        #.sample() will do random action out of action_space(total 6 possible action)
        
        # state will be next state after taking this action
        
        score += reward #What current reward is within this while loop
    
    print("Episode: {}\nScore:{}".format(episode,score))
    
env.close() 

Episode: 1
Score:215.0
Episode: 2
Score:225.0
Episode: 3
Score:55.0
Episode: 4
Score:200.0
Episode: 5
Score:355.0
Episode: 6
Score:70.0
Episode: 7
Score:245.0
Episode: 8
Score:150.0
Episode: 9
Score:210.0


In [5]:
env.action_space
# it states we can take 6 possible action in this environment



Discrete(6)

### Building our neural Network

In [6]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Conv2D
from tensorflow.keras.optimizers import Adam

In [7]:
def build_model(height,width,channels,actions): 
    #height width and channnels are pixel for our screen
    model = Sequential()
    model.add(Conv2D(32,(8,8),strides=(4,4),activation='relu',input_shape=(3,height,width,channels)))
    model.add(Conv2D(64,(4,4),strides=(2,2),activation='relu'))
    model.add(Flatten())
    model.add(Dense(512,activation='relu'))
    model.add(Dense(256,activation='relu'))
    model.add(Dense(actions,activation='linear'))
    return model
    
    

In [8]:
#Prinitng heigh,width and channel of our model
env.observation_space

#(210, 160, 3) represents a window our model will learn from

Box(0, 255, (210, 160, 3), uint8)

## Creating the variables

In [9]:
height,width,channels = env.observation_space.shape
actions = env.action_space.n
print(actions,"action")

6 action


## Delete the model from the memory

In [15]:
del model # if we get any error run these

In [16]:
model = build_model(height,width,channels,actions)
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
flatten_1 (Flatten)          (None, 82944)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 512)               42467840  
_________________________________________________________________
dense_5 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_6 (Dense)              (None, 6)                 1542      
Total params: 42,639,718
Trainable params: 42,639,718
Non-trainable params: 0
__________________________________________

In [17]:
# improting keras-rl2 reinforcement learning agents
from rl.agents import DQNAgent #it is going to create our deep q-networls
from rl.memory import SequentialMemory #Dataset that our model will be learning from
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
#policy is the behaviour of our agent. Once we've created optimal policy we've created optimal agent

## Building reinfircement learning agent

In [18]:
def build_agent(model,actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),attr='eps',value_max=1.,value_min=.1,value_test=.2,nb_steps=10000)
    
    """Linear Annealing Policy computes a current threshold value and
    transfers it to an inner policy which chooses the action. The threshold
    value is following a linear function decreasing over time."""
    
    
    memory = SequentialMemory(limit=2000,window_length=3)
    
    #creatint the agent
    dqn = DQNAgent(model=model,memory=memory,policy=policy,
                  enable_dueling_network=True,dueling_type='avg',
                  nb_actions = actions,nb_steps_warmup=1000)
    
    return dqn
    
    

## Create a variable that stores build agent function

In [19]:
dqn = build_agent(model,actions)

## Compiling 

In [20]:


dqn.compile(Adam(lr=0.0001))

## Training our agent

In [21]:
dqn.fit(env,nb_steps=4000,visualize=False,verbose=1)

Training for 4000 steps ...
Interval 1 (0 steps performed)






<tensorflow.python.keras.callbacks.History at 0x181f6edf808>

## Saving our model




In [22]:
dqn.save_weights('models/dqn2.h5f')

### Visulising our agent

In [23]:
dqn.load_weights('models/dqn2.h5f')
scores = dqn.test(env,nb_episodes=10,visualize = True)
print(np.mean(scores.history['episode_reward']))


Testing for 10 episodes ...
Episode 1: reward: 260.000, steps: 1137
Episode 2: reward: 120.000, steps: 657
Episode 3: reward: 30.000, steps: 341
Episode 4: reward: 230.000, steps: 1111
Episode 5: reward: 155.000, steps: 819
Episode 6: reward: 545.000, steps: 1193
Episode 7: reward: 125.000, steps: 701
Episode 8: reward: 110.000, steps: 1002
Episode 9: reward: 85.000, steps: 549
Episode 10: reward: 140.000, steps: 690
180.0
