# Install gym-0.21.0 and highway environment

In [None]:
!pip uninstall gym
!pip install gym==0.21.0
!pip install highway-env

Found existing installation: gym 0.17.3
Uninstalling gym-0.17.3:
  Would remove:
    /usr/local/lib/python3.7/dist-packages/gym-0.17.3.dist-info/*
    /usr/local/lib/python3.7/dist-packages/gym/*
Proceed (y/n)? y
  Successfully uninstalled gym-0.17.3
Collecting gym==0.21.0
  Downloading gym-0.21.0.tar.gz (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 5.3 MB/s 
Building wheels for collected packages: gym
  Building wheel for gym (setup.py) ... [?25l[?25hdone
  Created wheel for gym: filename=gym-0.21.0-py3-none-any.whl size=1616826 sha256=a8347f89c28c5e8364ff4cdda3b9194c36a760ef8706a5e868e840f884f0102a
  Stored in directory: /root/.cache/pip/wheels/76/ee/9c/36bfe3e079df99acf5ae57f4e3464ff2771b34447d6d2f2148
Successfully built gym
Installing collected packages: gym
Successfully installed gym-0.21.0


Collecting highway-env
  Downloading highway_env-1.5-py3-none-any.whl (103 kB)
[?25l[K     |███▏                            | 10 kB 23.7 MB/s eta 0:00:01[K     |██████▍                         | 20 kB 18.3 MB/s eta 0:00:01[K     |█████████▌                      | 30 kB 9.9 MB/s eta 0:00:01[K     |████████████▊                   | 40 kB 8.1 MB/s eta 0:00:01[K     |████████████████                | 51 kB 4.4 MB/s eta 0:00:01[K     |███████████████████             | 61 kB 5.2 MB/s eta 0:00:01[K     |██████████████████████▎         | 71 kB 5.3 MB/s eta 0:00:01[K     |█████████████████████████▌      | 81 kB 5.4 MB/s eta 0:00:01[K     |████████████████████████████▋   | 92 kB 5.9 MB/s eta 0:00:01[K     |███████████████████████████████▉| 102 kB 6.5 MB/s eta 0:00:01[K     |████████████████████████████████| 103 kB 6.5 MB/s 
Collecting pygame
  Downloading pygame-2.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (21.8 MB)
[K     |█████████████████████████████

# Mount the drive

In [1]:
import sys
import os
try:
  from google.colab import drive
  drive.mount('/content/gdrive/')
  project_path = 'ENPM690/Project/DQN_highway_env'
  sys.path.append(os.path.join('/content/gdrive/MyDrive', project_path))
except:
  print("Run only for google colab")

Mounted at /content/gdrive/


# imports

In [None]:
from common_utils import *
from models.dqn_conv_v1 import DQN as DQN
from train import *

In [None]:
def main():
    opt = parse_opts()
    print(opt)

    if not os.path.exists(opt.save_folder):
        os.mkdir(opt.save_folder)
    if not os.path.exists(os.path.join(opt.save_folder, opt.env)):
        os.mkdir(os.path.join(opt.save_folder, opt.env))

    timestamp = time.strftime('%b-%d-%Y_%H%M', time.localtime())
    f = open(os.path.join(opt.save_folder, f'{timestamp}_mv.csv'), 'w')
    writer = csv.writer(f)

    f_reward = open(os.path.join(opt.save_folder, f'{timestamp}_reward.csv'), 'w')
    writer_reward = csv.writer(f_reward)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    em = HighwayEnvManager(device)
    strategy = EpsilonGreedyStrategy(opt.eps_start, opt.eps_end, opt.eps_decay)
    agent = Agent(strategy, em.num_actions_available(), device)
    memory = ReplayMemory(opt.memory_size)
    
    policy_net = DQN(em.get_screen_height(), em.get_screen_width(), em.get_screen_stack(), em.num_actions_available()).to(device)
    target_net = DQN(em.get_screen_height(), em.get_screen_width(), em.get_screen_stack(), em.num_actions_available()).to(device)
    target_net.load_state_dict(policy_net.state_dict())
    target_net.eval()

    optimizer = optim.Adam(params=policy_net.parameters(), lr=opt.lr)
    criterion = nn.MSELoss()

    episode_durations = []
    rewards = []
    for episode in range(opt.num_episodes):
        duration, reward = train_epoch(opt, em, agent, policy_net, target_net, memory, device, optimizer, criterion)
        episode_durations.append(duration)
        rewards.append(reward)
        writer_reward.writerow([reward])

        moving_avg_period = 50
        moving_avg = get_moving_average(moving_avg_period, episode_durations)
        print("Episode", episode, "\n",
        moving_avg_period, "episode moving avg: ", moving_avg[-1], " | currect episode reward: ", reward )

        writer.writerow([moving_avg[-1]])
        # # plot(episode_durations, 100)
        if episode % opt.target_update == 0:
            target_net.load_state_dict(policy_net.state_dict())
        
        if episode % opt.save_interval == opt.save_interval - 1:
            state = {'epoch': episode, 'state_dict': policy_net.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}
            timestamp = time.strftime('%b-%d-%Y_%H%M', time.localtime())
            torch.save(state, os.path.join(os.path.join(opt.save_folder, opt.env),
                                          f'{opt.env}-Epoch-{episode}-Duration-{ moving_avg[-1]}_{timestamp}.pth'))
            print("Model saved with average duration ", moving_avg[-1])

    f.close()
    f_reward.close()
    f_duration = open(os.path.join(opt.save_folder, f'{timestamp}_duration.csv'), 'w')
    writer_duration = csv.writer(f_duration)
    writer_duration.writerow(episode_durations)
    f_duration.close()

In [None]:
main()

Namespace(batch_size=64, env='highway-v0', eps_decay=0.001, eps_end=0.01, eps_start=1, f='/root/.local/share/jupyter/runtime/kernel-07c11c0a-687f-40fd-8a4b-8de12f011977.json', gamma=0.999, lr=0.00025, memory_size=100000, model_name='highway-v0-Epoch-1199-Duration-17.84000015258789_Apr-19-2022_0315.pth', num_episodes=3000, save_folder='/content/gdrive/MyDrive/ENPM690/Project/DQN_highway_env/snapshots', save_interval=100, target_update=10)
Episode 0 
 50 episode moving avg:  7.0  | currect episode reward:  0.2116312027067536
Episode 1 
 50 episode moving avg:  5.0  | currect episode reward:  0.04444444444444443
Episode 2 
 50 episode moving avg:  12.333333  | currect episode reward:  0.022222222222222216
Episode 3 
 50 episode moving avg:  10.5  | currect episode reward:  0.04444444444444443
Episode 4 
 50 episode moving avg:  9.2  | currect episode reward:  0.022222222222222216
Episode 5 
 50 episode moving avg:  8.166667  | currect episode reward:  9.069537356813188e-05
Episode 6 
 50 