In [None]:
import os
import datetime
from google.colab import drive
drive.mount('/content/gdrive')

os.chdir('/content/gdrive/My Drive/CS6700/PA2')
!ls

Mounted at /content/gdrive
acrobot-video	dqn_agent.py	   models     __pycache__	utils.py
cartpole-video	dqn.py		   PA2.ipynb  replay_memory.py	wandb
car-video	fresh-sweep-1.mat  plots      Untitled0.ipynb


In [None]:
!nvidia-smi

In [None]:
!pip install wandb
!pip install gym==0.19.0

In [None]:
# Succesfully working
!apt install python-opengl
!apt install ffmpeg
!apt install xvfb
!pip3 install pyvirtualdisplay

In [None]:
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7f9140865190>

In [None]:
from scipy.io import loadmat,savemat
import numpy as np
import gym
from dqn_agent import DQNAgent
from utils import plot_learning_curve
from gym import wrappers
import wandb

In [None]:
def new_reward(pos):
  if pos>=0.5:
    return 10
  else:
    return (pos+1.2)/1.8-1

In [None]:
def run_dqn():
  config_defaults = {
    "l_size":50,
    "mem_size":100000,
    "lr":5e-4,
    "eps_dec":1e-5,
    "replace":20,
    "batch_size":64,
    "gamma":0.99
    }
  wandb.init(config=config_defaults)
  config = wandb.config
  env_name = 'Acrobot-v1'
  env = gym.make(env_name)
  best_score = -np.inf
  load_checkpoint = False
  render = False
  print(env.observation_space)
  print(env.action_space)

  agent = DQNAgent(gamma=config.gamma, epsilon=1, lr=config.lr,
                    input_dims=(env.observation_space.shape),
                    n_actions=env.action_space.n, mem_size=config.mem_size, eps_min=0.1,
                    batch_size=config.batch_size, replace=config.replace, eps_dec=config.eps_dec,
                    chkpt_dir='models/', algo='DQNAgent',
                    env_name=env_name,fc1_dims=config.l_size,fc2_dims=round(config.l_size/2),
                    fc3_dims=round(config.l_size/4))

  if load_checkpoint:
      agent.load_models()

  fname = agent.algo + '_' + agent.env_name + '_lr' + str(agent.lr) +'_' \
          + str(2000) + 'games'
  figure_file = 'plots/' + fname + '.png'

  print('name', wandb.run.name)

  if render:
    env = wrappers.Monitor(env, env_name+"video",
                      video_callable=lambda episode_id: True, force=True)
  
  
  s_10=[]
  n_10=[]
  scores, eps_history, steps_array = [], [], []
  for i in range(2000):
    done = False
    observation = env.reset()
    score = 0
    n_steps = 0
    while not done:
        if render:
          env.render()
        action = agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        if env_name == 'MountainCar-v0': # reward scaling for car
          reward = new_reward(observation_[0])
        score += reward

        if not load_checkpoint:
            agent.store_transition(observation, action,
                                  reward, observation_, done)
            agent.learn()
        observation = observation_
        n_steps += 1
    scores.append(score)
    steps_array.append(n_steps)

    avg_score = np.mean(scores[-100:])

    wandb.log({"score":score})
    wandb.log({"n_steps":n_steps})
    wandb.log({"mean_score":avg_score})
    
    print('episode: ', i,'score: ', score,
          ' average score %.1f' % avg_score, 'best score %.2f' % best_score,
        'epsilon %.2f' % agent.epsilon, 'steps', n_steps)

    if avg_score > best_score:
      best_score = avg_score

    eps_history.append(agent.epsilon)
    s_10.append(scores)
    n_10.append(steps_array)

In [None]:
sweep_config = {"name":"acrobat_sweep",
                "method": "bayes",
                "metric": {
                'name': 'score',
                'goal': 'maximize'   
              },
                "parameters":
                {
                    "l_size":{
                        "values":[50,75,100]
                    },
                    "mem_size":{
                        "values":[50000, 100000]
                    },
                    "lr":{
                        "values":[1e-4,1e-5,8e-6]
                    },
                    "eps_dec":{
                        "values":[1e-5,7e-6,5e-6]
                    },
                    "replace":{
                        "values":[800,1000,1500]
                    },
                    "batch_size":{
                        "values":[32,64]
                    },
                    "gamma":{
                        "values":[0.9,0.99,0.999]
                    }
                }
                }
sweep_id = wandb.sweep(sweep_config,entity="viswa_ee",project="dqn_pa2_v2")

Create sweep with ID: 5pczi6tm
Sweep URL: https://wandb.ai/viswa_ee/dqn_pa2_v2/sweeps/5pczi6tm


In [None]:
wandb.agent(sweep_id, entity="viswa_ee",project="dqn_pa2_v2",function=run_dqn,count=12)

In [None]:
def run_dqn():
  config_defaults = {
    "l_size":50,
    "mem_size":100000,
    "lr":5e-4,
    "eps_dec":1e-5,
    "replace":20,
    "batch_size":64,
    "gamma":0.99
    }
  wandb.init(config=config_defaults)
  config = wandb.config
  env_name = 'MountainCar-v0'
  env = gym.make(env_name)
  env._max_episode_steps = 1000
  best_score = -np.inf
  load_checkpoint = False
  render = False
  print(env.observation_space)
  print(env.action_space)

  agent = DQNAgent(gamma=config.gamma, epsilon=1, lr=config.lr,
                    input_dims=(env.observation_space.shape),
                    n_actions=env.action_space.n, mem_size=config.mem_size, eps_min=0.1,
                    batch_size=config.batch_size, replace=config.replace, eps_dec=config.eps_dec,
                    chkpt_dir='models/', algo='DQNAgent',
                    env_name=env_name,fc1_dims=config.l_size,fc2_dims=round(config.l_size/2),
                    fc3_dims=round(config.l_size/4))

  if load_checkpoint:
      agent.load_models()

  fname = agent.algo + '_' + agent.env_name + '_lr' + str(agent.lr) +'_' \
          + str(2000) + 'games'
  figure_file = 'plots/' + fname + '.png'

  print('name', wandb.run.name)

  if render:
    env = wrappers.Monitor(env, env_name+"video",
                      video_callable=lambda episode_id: True, force=True)
  
  
  s_10=[]
  n_10=[]
  scores, eps_history, steps_array = [], [], []
  for i in range(10000):
    done = False
    observation = env.reset()
    score = 0
    n_steps = 0
    while not done:
        if render:
          env.render()
        action = 2#agent.choose_action(observation)
        observation_, reward, done, info = env.step(action)
        # if env_name == 'MountainCar-v0': # reward scaling for car
        #   reward = new_reward(observation_[0])
        # if reward == 2:
        #   print('solved env...')
        score += reward

        if not load_checkpoint:
            agent.store_transition(observation, action,
                                  reward, observation_, done)
            agent.learn()
        observation = observation_
        n_steps += 1
    scores.append(score)
    steps_array.append(n_steps)

    avg_score = np.mean(scores[-100:])

    wandb.log({"score":score})
    wandb.log({"n_steps":n_steps})
    wandb.log({"mean_score":avg_score})
    
    print('episode: ', i,'score: ', score,
          ' average score %.1f' % avg_score, 'best score %.2f' % best_score,
        'epsilon %.2f' % agent.epsilon, 'steps', n_steps)

    if avg_score > best_score:
      best_score = avg_score

    eps_history.append(agent.epsilon)
    s_10.append(scores)
    n_10.append(steps_array)

In [None]:
sweep_config = {"name":"mountain_car_sweep",
                "method": "bayes",
                "metric": {
                'name': 'mean_score',
                'goal': 'maximize'   
              },
                "parameters":
                {
                    "l_size":{
                        "values":[50,75,100]
                    },
                    "mem_size":{
                        "values":[50000, 100000]
                    },
                    "lr":{
                        "values":[1e-4,1e-5,8e-6]
                    },
                    "eps_dec":{
                        "values":[1e-5,7e-6,5e-6]
                    },
                    "replace":{
                        "values":[800,1000,1500]
                    },
                    "batch_size":{
                        "values":[32,64]
                    },
                    "gamma":{
                        "values":[0.9,0.99,0.999]
                    }
                }
                }
sweep_id = wandb.sweep(sweep_config,entity="viswa_ee",project="dqn_pa2_v2")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: qd6at36w
Sweep URL: https://wandb.ai/viswa_ee/dqn_pa2_v2/sweeps/qd6at36w


In [None]:
wandb.agent(sweep_id, entity="viswa_ee",project="dqn_pa2_v2",function=run_dqn,count=12)

[34m[1mwandb[0m: Agent Starting Run: jeh7l911 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	eps_dec: 1e-05
[34m[1mwandb[0m: 	gamma: 0.9
[34m[1mwandb[0m: 	l_size: 50
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	mem_size: 50000
[34m[1mwandb[0m: 	replace: 1000
[34m[1mwandb[0m: Currently logged in as: [33mviswa_ee[0m (use `wandb login --relogin` to force relogin)


Box([-1.2  -0.07], [0.6  0.07], (2,), float32)
Discrete(3)
name treasured-sweep-1
episode:  0 score:  -1000.0  average score -1000.0 best score -inf epsilon 0.99 steps 1000
episode:  1 score:  -1000.0  average score -1000.0 best score -1000.00 epsilon 0.98 steps 1000
episode:  2 score:  -1000.0  average score -1000.0 best score -1000.00 epsilon 0.97 steps 1000
episode:  3 score:  -1000.0  average score -1000.0 best score -1000.00 epsilon 0.96 steps 1000
episode:  4 score:  -1000.0  average score -1000.0 best score -1000.00 epsilon 0.95 steps 1000
episode:  5 score:  -1000.0  average score -1000.0 best score -1000.00 epsilon 0.94 steps 1000
episode:  6 score:  -1000.0  average score -1000.0 best score -1000.00 epsilon 0.93 steps 1000
episode:  7 score:  -1000.0  average score -1000.0 best score -1000.00 epsilon 0.92 steps 1000
episode:  8 score:  -1000.0  average score -1000.0 best score -1000.00 epsilon 0.91 steps 1000
episode:  9 score:  -1000.0  average score -1000.0 best score -1000

In [None]:
from IPython.display import HTML
from base64 import b64encode
 
def show_video(video_path, video_width = 600):   
  video_file = open(video_path, "r+b").read() 
  video_url = f"data:video/mp4;base64,{b64encode(video_file).decode()}"
  return HTML(f"""<video width={video_width} controls><source src="{video_url}"></video>""")

In [None]:
show_video('./acrobot-video/openaigym.video.0.872.video000019.mp4')