In [1]:
import sys
import pdb
import gym
import math
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

from tensorboardX import SummaryWriter
from BayesianNetwork import BayesianNetwork
from BayesianQNetwork import BQN_learn

In [2]:
envt = "CartPole"

In [3]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
def test_agent(agent):
    
    count = 0    
    test_episode_rew = 0
    test_return = []
    
    done = False
    agent.dbqn.eval()
    
    obs = env.reset()
    act = agent.act(obs, use_sample=False, num_sample=0)
    
    while count <= 99:
        if done:
            test_return.append(test_episode_rew)
            test_episode_rew = 0
            count = count + 1
            
            obs = env.reset()
            act = agent.act(obs, use_sample=False, num_sample=0)
        
        obs1, rew, done, _ = env.step(act)       
        act = agent.act(obs1, use_sample=False, num_sample=0)        
        test_episode_rew = test_episode_rew + rew        
    
    agent.dbqn.train()
    return np.mean(np.array(test_return))

In [5]:
lr = 1e-2
batch_size = 64
gamma = 0.9
if envt == "CartPole":   
    lr = 1e-2
    gamma = 0.9
    steps = 20000    
    buffer_size = 20000
    features_list = [4, 32, 2]
    env = gym.make('CartPole-v0')
elif envt == "Pendulum":
    steps = 200000
    buffer_size = 50000
    features_list = [2, 32, 5]
    env = gym.make('Pendulum-v0')
elif envt == "MountainCar":
    steps = 200000
    buffer_size = 50000
    features_list = [2, 32, 3]
    env = gym.make('MountainCar-v0')

In [6]:
runs = 5
run_result = []

for run in range(runs):
        
    writer = SummaryWriter('results/Cartpole1/')
    dbqn = BayesianNetwork(features_list, 4, batch_size, steps).to(DEVICE)    
    target_dbqn = BayesianNetwork(features_list, 4, batch_size, steps).to(DEVICE)
    agent = BQN_learn(dbqn, target_dbqn, gamma, lr, batch_size, buffer_size, writer)
    
    done = False

    episode_rew = 0
    episode_count = 0
    res = []

    obs = env.reset()
    act = agent.reset(obs)         

    while agent.t <= steps and episode_count < 300:

        if done:
            print("Episode " + str(episode_count) + " with reward = " + str(episode_rew))  
            writer.add_scalar('data/reward', episode_rew, episode_count)
            res.append(episode_rew)
            episode_rew = 0
            episode_count = episode_count + 1                            

            if episode_count%25 == 0:
                test_result = test_agent(agent)
                print("Test Result = " + str(test_result))
                writer.add_scalar('data/test_reward', test_result, episode_count)
                
#             for param_group in agent.optimizer.param_groups:
#                 if param_group['lr'] > 1e-3:
#                     param_group['lr'] = 1e-2 - 1e-3*(episode_count//100)

            obs = env.reset()
            act = agent.reset(obs)   

        obs1, rew, done, _ = env.step(act)       
        act = agent.step(obs, act, rew, obs1, done)
        obs = obs1
        episode_rew = episode_rew + rew    
            
    writer.export_scalars_to_json("./all_scalars.json")
    writer.close()
        
    run_result.append(res)

Episode 0 with reward = 23.0
Episode 1 with reward = 9.0
Episode 2 with reward = 10.0
Episode 3 with reward = 15.0
Episode 4 with reward = 22.0
Episode 5 with reward = 28.0
Episode 6 with reward = 19.0
Episode 7 with reward = 20.0
Episode 8 with reward = 16.0
Episode 9 with reward = 12.0
Episode 10 with reward = 13.0
Episode 11 with reward = 28.0
Episode 12 with reward = 13.0
Episode 13 with reward = 47.0
Episode 14 with reward = 26.0
Episode 15 with reward = 10.0
Episode 16 with reward = 45.0
Episode 17 with reward = 25.0
Episode 18 with reward = 68.0
Episode 19 with reward = 37.0
Episode 20 with reward = 15.0
Episode 21 with reward = 22.0
Episode 22 with reward = 12.0
Episode 23 with reward = 22.0
Episode 24 with reward = 22.0
Test Result = 13.3
Episode 25 with reward = 14.0
Episode 26 with reward = 15.0
Episode 27 with reward = 29.0
Episode 28 with reward = 13.0
Episode 29 with reward = 21.0
Episode 30 with reward = 14.0
Episode 31 with reward = 15.0
Episode 32 with reward = 21.0
Ep

Episode 262 with reward = 46.0
Episode 263 with reward = 156.0
Episode 264 with reward = 200.0
Episode 265 with reward = 77.0
Episode 266 with reward = 89.0
Episode 267 with reward = 166.0
Episode 268 with reward = 47.0
Episode 269 with reward = 122.0
Episode 270 with reward = 114.0
Episode 271 with reward = 136.0
Episode 272 with reward = 164.0
Episode 273 with reward = 127.0
Episode 274 with reward = 29.0
Test Result = 165.79
Episode 275 with reward = 48.0
Episode 276 with reward = 187.0
Episode 277 with reward = 194.0
Episode 278 with reward = 200.0
Episode 279 with reward = 197.0
Episode 280 with reward = 200.0
Episode 281 with reward = 200.0
Episode 282 with reward = 200.0
Episode 283 with reward = 197.0
Episode 284 with reward = 200.0
Episode 285 with reward = 200.0
Episode 286 with reward = 200.0
Episode 287 with reward = 172.0
Episode 288 with reward = 200.0
Episode 289 with reward = 194.0
Episode 290 with reward = 200.0
Episode 291 with reward = 200.0
Episode 292 with reward =

Episode 223 with reward = 38.0
Episode 224 with reward = 77.0
Test Result = 72.82
Episode 225 with reward = 52.0
Episode 226 with reward = 19.0
Episode 227 with reward = 39.0
Episode 228 with reward = 30.0
Episode 229 with reward = 28.0
Episode 230 with reward = 61.0
Episode 231 with reward = 45.0
Episode 232 with reward = 31.0
Episode 233 with reward = 47.0
Episode 234 with reward = 61.0
Episode 235 with reward = 35.0
Episode 236 with reward = 36.0
Episode 237 with reward = 29.0
Episode 238 with reward = 45.0
Episode 239 with reward = 25.0
Episode 240 with reward = 47.0
Episode 241 with reward = 81.0
Episode 242 with reward = 54.0
Episode 243 with reward = 41.0
Episode 244 with reward = 36.0
Episode 245 with reward = 61.0
Episode 246 with reward = 101.0
Episode 247 with reward = 106.0
Episode 248 with reward = 122.0
Episode 249 with reward = 87.0
Test Result = 103.75
Episode 250 with reward = 70.0
Episode 251 with reward = 57.0
Episode 252 with reward = 48.0
Episode 253 with reward = 

Episode 183 with reward = 60.0
Episode 184 with reward = 48.0
Episode 185 with reward = 47.0
Episode 186 with reward = 26.0
Episode 187 with reward = 16.0
Episode 188 with reward = 42.0
Episode 189 with reward = 50.0
Episode 190 with reward = 18.0
Episode 191 with reward = 19.0
Episode 192 with reward = 33.0
Episode 193 with reward = 32.0
Episode 194 with reward = 15.0
Episode 195 with reward = 63.0
Episode 196 with reward = 71.0
Episode 197 with reward = 56.0
Episode 198 with reward = 28.0
Episode 199 with reward = 33.0
Test Result = 57.88
Episode 200 with reward = 80.0
Episode 201 with reward = 41.0
Episode 202 with reward = 17.0
Episode 203 with reward = 52.0
Episode 204 with reward = 30.0
Episode 205 with reward = 48.0
Episode 206 with reward = 24.0
Episode 207 with reward = 37.0
Episode 208 with reward = 13.0
Episode 209 with reward = 27.0
Episode 210 with reward = 27.0
Episode 211 with reward = 61.0
Episode 212 with reward = 18.0
Episode 213 with reward = 35.0
Episode 214 with re

Episode 144 with reward = 14.0
Episode 145 with reward = 39.0
Episode 146 with reward = 9.0
Episode 147 with reward = 20.0
Episode 148 with reward = 10.0
Episode 149 with reward = 20.0
Test Result = 17.38
Episode 150 with reward = 25.0
Episode 151 with reward = 17.0
Episode 152 with reward = 12.0
Episode 153 with reward = 11.0
Episode 154 with reward = 29.0
Episode 155 with reward = 25.0
Episode 156 with reward = 16.0
Episode 157 with reward = 28.0
Episode 158 with reward = 14.0
Episode 159 with reward = 13.0
Episode 160 with reward = 19.0
Episode 161 with reward = 20.0
Episode 162 with reward = 30.0
Episode 163 with reward = 15.0
Episode 164 with reward = 16.0
Episode 165 with reward = 24.0
Episode 166 with reward = 25.0
Episode 167 with reward = 77.0
Episode 168 with reward = 39.0
Episode 169 with reward = 12.0
Episode 170 with reward = 22.0
Episode 171 with reward = 24.0
Episode 172 with reward = 43.0
Episode 173 with reward = 42.0
Episode 174 with reward = 79.0
Test Result = 20.49


Episode 104 with reward = 19.0
Episode 105 with reward = 36.0
Episode 106 with reward = 47.0
Episode 107 with reward = 36.0
Episode 108 with reward = 39.0
Episode 109 with reward = 24.0
Episode 110 with reward = 45.0
Episode 111 with reward = 34.0
Episode 112 with reward = 63.0
Episode 113 with reward = 38.0
Episode 114 with reward = 16.0
Episode 115 with reward = 36.0
Episode 116 with reward = 24.0
Episode 117 with reward = 17.0
Episode 118 with reward = 62.0
Episode 119 with reward = 40.0
Episode 120 with reward = 32.0
Episode 121 with reward = 32.0
Episode 122 with reward = 54.0
Episode 123 with reward = 44.0
Episode 124 with reward = 84.0
Test Result = 54.44
Episode 125 with reward = 102.0
Episode 126 with reward = 46.0
Episode 127 with reward = 20.0
Episode 128 with reward = 35.0
Episode 129 with reward = 35.0
Episode 130 with reward = 200.0
Episode 131 with reward = 33.0
Episode 132 with reward = 71.0
Episode 133 with reward = 15.0
Episode 134 with reward = 48.0
Episode 135 with 