## Import libraries

### Install dependencies

In [0]:
!pip install pyvirtualdisplay
!pip install stable-baselines[mpi]==2.9.0

### Mount Google Drive

In [0]:
import os
from google.colab import drive
drive.mount('/content/drive')

### Stable baselines and other libraries


In [0]:
%tensorflow_version 1.x

import gym
import numpy as np
from stable_baselines.deepq.policies import CnnPolicy,LnCnnPolicy
from stable_baselines.common.vec_env import VecVideoRecorder,SubprocVecEnv,DummyVecEnv
from stable_baselines import DQN

import base64
import IPython
import PIL.Image
import pyvirtualdisplay

# Video stuff 
from pathlib import Path
from IPython import display as ipythondisplay

import matplotlib.pyplot as plt

## Environment

In [0]:
env = gym.make('Assault-v0')
env= DummyVecEnv([lambda : env])

### First Example

In [0]:
model = DQN(CnnPolicy, env,  learning_rate=2.5e-4, 
            exploration_final_eps=0.05,tensorboard_log="/content/drive/My Drive/Tensorboard")

### Second Example

In [0]:
model = DQN(CnnPolicy, env, gamma=0.89,learning_rate=2.5e-4, tensorboard_log="/content/drive/My Drive/Tensorboard")

### Third Example

In [0]:
model = DQN(LnCnnPolicy,env,learning_rate=5e-4,buffer_size=100000, 
            exploration_final_eps=0.01, train_freq=4, learning_starts=100,target_network_update_freq=1000,
            gamma=0.98, prioritized_replay=True, prioritized_replay_alpha=0.6,
            tensorboard_log="/content/drive/My Drive/Tensorboard")

### Fourth Example

In [0]:
model = DQN(LnCnnPolicy, env,learning_rate=2.5e-4,buffer_size=100000, 
            exploration_final_eps=0.01, train_freq=4, learning_starts=100,target_network_update_freq=1000,
            gamma=0.99, prioritized_replay=True, prioritized_replay_alpha=0.6,
            tensorboard_log="/content/drive/My Drive/Tensorboard")

### Fifth Example

In [0]:
model = DQN(LnCnnPolicy, env,learning_rate=2.5e-4,buffer_size=100000,
            exploration_final_eps=0.01, train_freq=4, learning_starts=100,target_network_update_freq=1000,
            gamma=0.9, prioritized_replay=True, prioritized_replay_alpha=0.4,
            tensorboard_log="/content/drive/My Drive/Tensorboard")

## Train

In [0]:
model.learn(total_timesteps=50000)

## TensorBoard 

In [0]:
%load_ext tensorboard
%tensorboard --logdir "/content/drive/My Drive/Tensorboard"

## Save Model

In [0]:
models_dir = '/content/drive/My Drive/RL/image'

def savemodel(model,problem):
    filename = os.path.join(models_dir, '%s.h5' %problem)
    model.save(filename)
    print("\nModel saved successfully on file %s\n" %filename)

savemodel(model,'model4')


## Load Model

In [0]:
models_dir = '/content/drive/My Drive/RL/image'

def loadmodel(problem):
    filename = os.path.join(models_dir, '%s.h5' %problem)
    try:
        modelRL = DQN.load(filename)
        print("\nModel loaded successfully from file %s\n" %filename)
    except OSError:    
        print("\nModel file %s not found!!!\n" %filename)
        modelRL = None
    return modelRL

model = loadmodel('model3')

## Test

In [0]:
avg_reward=[]
episodes_rewards1=[]
episodes_rewards2=[]
episodes_rewards3=[]
for i in range(1,4):
  for episodes in range(1,101):
    obs = env.reset()
    reward_sum = 0
    while True:
        action = model.predict(obs)
        obs, reward, done, _ = env.step(action)
        env.render
        reward_sum += reward

        if done:
            if(i==1):
              episodes_rewards1.append(reward_sum)
            if(i==2):
              episodes_rewards2.append(reward_sum)
            if(i==3):
              episodes_rewards3.append(reward_sum)
            print("Episodes: {}".format(episodes))
            print("Total score: {}".format(reward_sum))
            break

### Average of episodes 

In [0]:
for j in range(0,100):
  avg_reward.append(np.mean([episodes_rewards1[j],episodes_rewards2[j],episodes_rewards3[j]]))
print("Avg score: {}".format(avg_reward))

### Plot Results

In [0]:
plt.plot(avg_reward)
plt.ylabel("average rewards")
plt.xlabel("number of episodes")
plt.show()

## Record Video and Show Video Functions

In [0]:
# Record video
def record_video(env_id, model, video_length=1500, prefix='', video_folder='/content/drive/My Drive/videos/image'):
  """
  :param env_id: (str)
  :param model: (RL model)
  :param video_length: (int)
  :param prefix: (str)
  :param video_folder: (str)
  """
  eval_env = DummyVecEnv([lambda: gym.make(env_id)])
  # Start the video at step=0 and record 500 steps
  eval_env = VecVideoRecorder(env, video_folder=video_folder,
                              record_video_trigger=lambda step: step == 0, video_length=video_length,
                              name_prefix=prefix)

  obs = eval_env.reset()
  for _ in range(video_length):
    action, _ = model.predict(obs)
    obs, _, _, _ = eval_env.step(action)

  # Close the video recorder
  eval_env.close()



# Display video
def show_videos(video_path='', prefix=''):
  html = []
  for mp4 in Path(video_path).glob("{}*.mp4".format(prefix)):
      video_b64 = base64.b64encode(mp4.read_bytes())
      html.append('''<video alt="{}" autoplay 
                    loop controls style="height: 400px;">
                    <source src="data:video/mp4;base64,{}" type="video/mp4" />
                </video>'''.format(mp4, video_b64.decode('ascii')))
  ipythondisplay.display(ipythondisplay.HTML(data="<br>".join(html)))

### Show Video

In [0]:
record_video('Assault-v0', model, video_length=1500, prefix='dqn-assault')
show_videos('/content/drive/My Drive/videos/image', prefix='dqn-assault')