In [2]:
import math
import datetime
import os, sys
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from IPython.display import Audio
import csv

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torchsummary import summary

import cv2




# local files
sys.path.insert(0, '../')
import pyClient
import utils
import model
from model import Transition

In [3]:
# Training parameters
BATCH_SIZE = 128 #original 128
GAMMA = 0.999
EPS_START = 0.95
EPS_END = 0.05
EPS_DECAY_steps = 4000 
EPS_DECAY = (EPS_START - EPS_END)/EPS_DECAY_steps
REPLAY_START_SIZE =  128 #TODO PUT BACK TO 1500 #steps taken
TARGET_UPDATE = 10 #episodes
DEVICE = 'cuda:0'

# Environment parameters
IMSIZE = 128
STACK_SIZE = 1
N_ACTIONS = 3
IP  = "127.0.0.1" # Ip address that the TCP/IP interface listens to
PORT = 13000       # Port number that the TCP/IP interface listens to


environment = pyClient.Environment(ip = IP, port = PORT, size = IMSIZE) 
agent = model.DoubleDQNAgent(imsize=IMSIZE,
                 in_channels=STACK_SIZE,
                 n_actions=N_ACTIONS,
                 memory_capacity=12000,
                 eps_start=EPS_START,
                 eps_end=EPS_END,
                 eps_delta=EPS_DECAY,
                 gamma_discount = GAMMA,
                 batch_size = BATCH_SIZE,
                 device=DEVICE)



In [4]:
# Utilities
def process_state(state_raw):
    """ @TODO """
    frame = environment.state2usableArray(state_raw)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = frame.astype('float32')
    return torch.Tensor(frame / 255.).view(1,1,environment.size, environment.size)

In [6]:
MAX_EPISODES = 1e5
MAX_STEPS  = 1e7
TRAINING_CONDITION = 0
LOGFILE = 'Experiments/Out/test-01.csv'

RESET_UPON_END_SIGNAL = {0:False,  # Nothing happened
                         1:True,   # Box collision
                         2:True,   # Wall collision
                         3:False}  # Reached step target



with open(LOGFILE, 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(['episode','step_count', 'train_loss', 'reward'])

lr_dqn = 0.01
optimizer = optim.Adam(agent.policy_net.parameters(), lr = lr_dqn)
total_reward = 0
total_loss = 0
for episode in range(int(MAX_EPISODES)):
    with open(LOGFILE, 'a') as csvfile:
        writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        writer.writerow([episode,agent.step_count,total_reward,total_loss]) 
    
    
    if agent.step_count > MAX_STEPS:
        break
    
    if episode % TARGET_UPDATE == 0:  #episodes
        print('episode {}, target net updated'.format(episode))
        agent.update_target_net
    
    _, _, state_raw = environment.reset(TRAINING_CONDITION)
    state = process_state(state_raw).to(DEVICE)

    for t in count(): 
        action = agent.select_action(state)
        end, reward, next_state_raw = environment.step(action.item())
        next_state = process_state(next_state_raw).to(DEVICE) if not RESET_UPON_END_SIGNAL[end] else None
        if reward > 100:
            reward = -(reward -100)
        reward = torch.tensor([reward], device=DEVICE,dtype=torch.float)
        total_reward += reward.item();
        action = action.unsqueeze(0)
        agent.memory.push(state, action, next_state, reward)

        if RESET_UPON_END_SIGNAL[end]:
            break
            
        if len(agent.memory) > REPLAY_START_SIZE:
            
            state_action_values, expected_state_action_values = agent.forward()
            
            """ @TODO: Optimize model"""
            # Compute Huber loss
            loss = F.smooth_l1_loss(state_action_values, expected_state_action_values)
            total_loss += loss.item()

            # Optimize the model
            optimizer.zero_grad()
            loss.backward()

            nn.utils.clip_grad_norm_(agent.policy_net.parameters(), 1)

            optimizer.step()
        else:
            agent.step_count = 0

episode 0, target net updated
episode 10, target net updated
episode 20, target net updated
episode 30, target net updated
episode 40, target net updated
episode 50, target net updated
episode 60, target net updated
episode 70, target net updated
episode 80, target net updated
episode 90, target net updated
episode 100, target net updated
episode 110, target net updated
episode 120, target net updated
episode 130, target net updated
episode 140, target net updated
episode 150, target net updated
episode 160, target net updated
episode 170, target net updated
episode 180, target net updated
episode 190, target net updated
episode 200, target net updated
episode 210, target net updated
episode 220, target net updated
episode 230, target net updated
episode 240, target net updated
episode 250, target net updated
episode 260, target net updated
episode 270, target net updated
episode 280, target net updated
episode 290, target net updated
episode 300, target net updated
episode 310, target

episode 2520, target net updated
episode 2530, target net updated
episode 2540, target net updated
episode 2550, target net updated
episode 2560, target net updated
episode 2570, target net updated
episode 2580, target net updated
episode 2590, target net updated
episode 2600, target net updated
episode 2610, target net updated
episode 2620, target net updated
episode 2630, target net updated
episode 2640, target net updated
episode 2650, target net updated
episode 2660, target net updated
episode 2670, target net updated
episode 2680, target net updated
episode 2690, target net updated
episode 2700, target net updated
episode 2710, target net updated
episode 2720, target net updated
episode 2730, target net updated
episode 2740, target net updated
episode 2750, target net updated
episode 2760, target net updated
episode 2770, target net updated
episode 2780, target net updated
episode 2790, target net updated
episode 2800, target net updated
episode 2810, target net updated
episode 28

episode 5010, target net updated
episode 5020, target net updated
episode 5030, target net updated
episode 5040, target net updated
episode 5050, target net updated
episode 5060, target net updated
episode 5070, target net updated
episode 5080, target net updated
episode 5090, target net updated
episode 5100, target net updated
episode 5110, target net updated
episode 5120, target net updated
episode 5130, target net updated
episode 5140, target net updated
episode 5150, target net updated
episode 5160, target net updated
episode 5170, target net updated
episode 5180, target net updated
episode 5190, target net updated
episode 5200, target net updated
episode 5210, target net updated
episode 5220, target net updated
episode 5230, target net updated
episode 5240, target net updated
episode 5250, target net updated
episode 5260, target net updated
episode 5270, target net updated
episode 5280, target net updated
episode 5290, target net updated
episode 5300, target net updated
episode 53

episode 7500, target net updated
episode 7510, target net updated
episode 7520, target net updated
episode 7530, target net updated
episode 7540, target net updated
episode 7550, target net updated
episode 7560, target net updated
episode 7570, target net updated
episode 7580, target net updated
episode 7590, target net updated
episode 7600, target net updated
episode 7610, target net updated
episode 7620, target net updated
episode 7630, target net updated
episode 7640, target net updated
episode 7650, target net updated
episode 7660, target net updated
episode 7670, target net updated
episode 7680, target net updated
episode 7690, target net updated
episode 7700, target net updated
episode 7710, target net updated
episode 7720, target net updated
episode 7730, target net updated
episode 7740, target net updated
episode 7750, target net updated
episode 7760, target net updated
episode 7770, target net updated
episode 7780, target net updated
episode 7790, target net updated
episode 78

episode 9990, target net updated
episode 10000, target net updated
episode 10010, target net updated
episode 10020, target net updated
episode 10030, target net updated
episode 10040, target net updated
episode 10050, target net updated
episode 10060, target net updated
episode 10070, target net updated
episode 10080, target net updated
episode 10090, target net updated
episode 10100, target net updated
episode 10110, target net updated
episode 10120, target net updated
episode 10130, target net updated
episode 10140, target net updated
episode 10150, target net updated
episode 10160, target net updated
episode 10170, target net updated
episode 10180, target net updated
episode 10190, target net updated
episode 10200, target net updated
episode 10210, target net updated
episode 10220, target net updated
episode 10230, target net updated
episode 10240, target net updated
episode 10250, target net updated
episode 10260, target net updated
episode 10270, target net updated
episode 10280, 

episode 12400, target net updated
episode 12410, target net updated
episode 12420, target net updated
episode 12430, target net updated
episode 12440, target net updated
episode 12450, target net updated
episode 12460, target net updated
episode 12470, target net updated
episode 12480, target net updated
episode 12490, target net updated
episode 12500, target net updated
episode 12510, target net updated
episode 12520, target net updated
episode 12530, target net updated
episode 12540, target net updated
episode 12550, target net updated
episode 12560, target net updated
episode 12570, target net updated
episode 12580, target net updated
episode 12590, target net updated
episode 12600, target net updated
episode 12610, target net updated
episode 12620, target net updated
episode 12630, target net updated
episode 12640, target net updated
episode 12650, target net updated
episode 12660, target net updated
episode 12670, target net updated
episode 12680, target net updated
episode 12690,

episode 14810, target net updated
episode 14820, target net updated
episode 14830, target net updated
episode 14840, target net updated
episode 14850, target net updated
episode 14860, target net updated
episode 14870, target net updated
episode 14880, target net updated
episode 14890, target net updated
episode 14900, target net updated
episode 14910, target net updated
episode 14920, target net updated
episode 14930, target net updated
episode 14940, target net updated
episode 14950, target net updated
episode 14960, target net updated
episode 14970, target net updated
episode 14980, target net updated
episode 14990, target net updated
episode 15000, target net updated
episode 15010, target net updated
episode 15020, target net updated
episode 15030, target net updated
episode 15040, target net updated
episode 15050, target net updated
episode 15060, target net updated
episode 15070, target net updated
episode 15080, target net updated
episode 15090, target net updated
episode 15100,

episode 17220, target net updated
episode 17230, target net updated
episode 17240, target net updated
episode 17250, target net updated
episode 17260, target net updated
episode 17270, target net updated
episode 17280, target net updated
episode 17290, target net updated
episode 17300, target net updated
episode 17310, target net updated
episode 17320, target net updated
episode 17330, target net updated
episode 17340, target net updated
episode 17350, target net updated
episode 17360, target net updated
episode 17370, target net updated
episode 17380, target net updated
episode 17390, target net updated
episode 17400, target net updated
episode 17410, target net updated
episode 17420, target net updated
episode 17430, target net updated
episode 17440, target net updated
episode 17450, target net updated
episode 17460, target net updated
episode 17470, target net updated
episode 17480, target net updated
episode 17490, target net updated
episode 17500, target net updated
episode 17510,

episode 19630, target net updated
episode 19640, target net updated
episode 19650, target net updated
episode 19660, target net updated
episode 19670, target net updated
episode 19680, target net updated
episode 19690, target net updated
episode 19700, target net updated
episode 19710, target net updated
episode 19720, target net updated
episode 19730, target net updated
episode 19740, target net updated
episode 19750, target net updated
episode 19760, target net updated
episode 19770, target net updated
episode 19780, target net updated
episode 19790, target net updated
episode 19800, target net updated
episode 19810, target net updated
episode 19820, target net updated
episode 19830, target net updated
episode 19840, target net updated
episode 19850, target net updated
episode 19860, target net updated
episode 19870, target net updated
episode 19880, target net updated
episode 19890, target net updated
episode 19900, target net updated
episode 19910, target net updated
episode 19920,

episode 22040, target net updated
episode 22050, target net updated
episode 22060, target net updated
episode 22070, target net updated
episode 22080, target net updated
episode 22090, target net updated
episode 22100, target net updated
episode 22110, target net updated
episode 22120, target net updated
episode 22130, target net updated
episode 22140, target net updated
episode 22150, target net updated
episode 22160, target net updated
episode 22170, target net updated
episode 22180, target net updated
episode 22190, target net updated
episode 22200, target net updated
episode 22210, target net updated
episode 22220, target net updated
episode 22230, target net updated
episode 22240, target net updated
episode 22250, target net updated
episode 22260, target net updated
episode 22270, target net updated
episode 22280, target net updated
episode 22290, target net updated
episode 22300, target net updated
episode 22310, target net updated
episode 22320, target net updated
episode 22330,

episode 24450, target net updated
episode 24460, target net updated
episode 24470, target net updated
episode 24480, target net updated
episode 24490, target net updated
episode 24500, target net updated
episode 24510, target net updated
episode 24520, target net updated
episode 24530, target net updated
episode 24540, target net updated
episode 24550, target net updated
episode 24560, target net updated
episode 24570, target net updated
episode 24580, target net updated
episode 24590, target net updated
episode 24600, target net updated
episode 24610, target net updated
episode 24620, target net updated
episode 24630, target net updated
episode 24640, target net updated
episode 24650, target net updated
episode 24660, target net updated
episode 24670, target net updated
episode 24680, target net updated
episode 24690, target net updated
episode 24700, target net updated
episode 24710, target net updated
episode 24720, target net updated
episode 24730, target net updated
episode 24740,

episode 26860, target net updated
episode 26870, target net updated
episode 26880, target net updated
episode 26890, target net updated
episode 26900, target net updated
episode 26910, target net updated
episode 26920, target net updated
episode 26930, target net updated
episode 26940, target net updated
episode 26950, target net updated
episode 26960, target net updated
episode 26970, target net updated
episode 26980, target net updated
episode 26990, target net updated
episode 27000, target net updated
episode 27010, target net updated
episode 27020, target net updated
episode 27030, target net updated
episode 27040, target net updated
episode 27050, target net updated
episode 27060, target net updated
episode 27070, target net updated
episode 27080, target net updated
episode 27090, target net updated
episode 27100, target net updated
episode 27110, target net updated
episode 27120, target net updated
episode 27130, target net updated
episode 27140, target net updated
episode 27150,

episode 29270, target net updated
episode 29280, target net updated
episode 29290, target net updated
episode 29300, target net updated
episode 29310, target net updated
episode 29320, target net updated
episode 29330, target net updated
episode 29340, target net updated
episode 29350, target net updated
episode 29360, target net updated
episode 29370, target net updated
episode 29380, target net updated
episode 29390, target net updated
episode 29400, target net updated
episode 29410, target net updated
episode 29420, target net updated
episode 29430, target net updated
episode 29440, target net updated
episode 29450, target net updated
episode 29460, target net updated
episode 29470, target net updated
episode 29480, target net updated
episode 29490, target net updated
episode 29500, target net updated
episode 29510, target net updated
episode 29520, target net updated
episode 29530, target net updated
episode 29540, target net updated
episode 29550, target net updated
episode 29560,

episode 31680, target net updated
episode 31690, target net updated
episode 31700, target net updated
episode 31710, target net updated
episode 31720, target net updated
episode 31730, target net updated
episode 31740, target net updated
episode 31750, target net updated
episode 31760, target net updated
episode 31770, target net updated
episode 31780, target net updated
episode 31790, target net updated
episode 31800, target net updated
episode 31810, target net updated
episode 31820, target net updated
episode 31830, target net updated
episode 31840, target net updated
episode 31850, target net updated
episode 31860, target net updated
episode 31870, target net updated
episode 31880, target net updated
episode 31890, target net updated
episode 31900, target net updated
episode 31910, target net updated
episode 31920, target net updated
episode 31930, target net updated
episode 31940, target net updated
episode 31950, target net updated
episode 31960, target net updated
episode 31970,

episode 34090, target net updated
episode 34100, target net updated
episode 34110, target net updated
episode 34120, target net updated
episode 34130, target net updated
episode 34140, target net updated
episode 34150, target net updated
episode 34160, target net updated
episode 34170, target net updated
episode 34180, target net updated
episode 34190, target net updated
episode 34200, target net updated
episode 34210, target net updated
episode 34220, target net updated
episode 34230, target net updated
episode 34240, target net updated
episode 34250, target net updated
episode 34260, target net updated
episode 34270, target net updated
episode 34280, target net updated
episode 34290, target net updated
episode 34300, target net updated
episode 34310, target net updated
episode 34320, target net updated
episode 34330, target net updated
episode 34340, target net updated
episode 34350, target net updated
episode 34360, target net updated
episode 34370, target net updated
episode 34380,

episode 36500, target net updated
episode 36510, target net updated
episode 36520, target net updated
episode 36530, target net updated
episode 36540, target net updated
episode 36550, target net updated
episode 36560, target net updated
episode 36570, target net updated
episode 36580, target net updated
episode 36590, target net updated
episode 36600, target net updated
episode 36610, target net updated
episode 36620, target net updated
episode 36630, target net updated
episode 36640, target net updated
episode 36650, target net updated
episode 36660, target net updated
episode 36670, target net updated
episode 36680, target net updated
episode 36690, target net updated
episode 36700, target net updated
episode 36710, target net updated
episode 36720, target net updated
episode 36730, target net updated
episode 36740, target net updated
episode 36750, target net updated
episode 36760, target net updated
episode 36770, target net updated
episode 36780, target net updated
episode 36790,

episode 38910, target net updated
episode 38920, target net updated
episode 38930, target net updated
episode 38940, target net updated
episode 38950, target net updated
episode 38960, target net updated
episode 38970, target net updated
episode 38980, target net updated
episode 38990, target net updated
episode 39000, target net updated
episode 39010, target net updated
episode 39020, target net updated
episode 39030, target net updated
episode 39040, target net updated
episode 39050, target net updated
episode 39060, target net updated
episode 39070, target net updated
episode 39080, target net updated
episode 39090, target net updated
episode 39100, target net updated
episode 39110, target net updated
episode 39120, target net updated
episode 39130, target net updated
episode 39140, target net updated
episode 39150, target net updated
episode 39160, target net updated
episode 39170, target net updated
episode 39180, target net updated
episode 39190, target net updated
episode 39200,

episode 41320, target net updated
episode 41330, target net updated
episode 41340, target net updated
episode 41350, target net updated
episode 41360, target net updated
episode 41370, target net updated
episode 41380, target net updated
episode 41390, target net updated
episode 41400, target net updated
episode 41410, target net updated
episode 41420, target net updated
episode 41430, target net updated
episode 41440, target net updated
episode 41450, target net updated
episode 41460, target net updated
episode 41470, target net updated
episode 41480, target net updated
episode 41490, target net updated
episode 41500, target net updated
episode 41510, target net updated
episode 41520, target net updated
episode 41530, target net updated
episode 41540, target net updated
episode 41550, target net updated
episode 41560, target net updated
episode 41570, target net updated
episode 41580, target net updated
episode 41590, target net updated
episode 41600, target net updated
episode 41610,

episode 43730, target net updated
episode 43740, target net updated
episode 43750, target net updated
episode 43760, target net updated
episode 43770, target net updated
episode 43780, target net updated
episode 43790, target net updated
episode 43800, target net updated
episode 43810, target net updated
episode 43820, target net updated
episode 43830, target net updated
episode 43840, target net updated
episode 43850, target net updated
episode 43860, target net updated
episode 43870, target net updated
episode 43880, target net updated
episode 43890, target net updated
episode 43900, target net updated
episode 43910, target net updated
episode 43920, target net updated
episode 43930, target net updated
episode 43940, target net updated
episode 43950, target net updated
episode 43960, target net updated
episode 43970, target net updated
episode 43980, target net updated
episode 43990, target net updated
episode 44000, target net updated
episode 44010, target net updated
episode 44020,

episode 46140, target net updated
episode 46150, target net updated
episode 46160, target net updated
episode 46170, target net updated
episode 46180, target net updated
episode 46190, target net updated
episode 46200, target net updated
episode 46210, target net updated
episode 46220, target net updated
episode 46230, target net updated
episode 46240, target net updated
episode 46250, target net updated
episode 46260, target net updated
episode 46270, target net updated
episode 46280, target net updated
episode 46290, target net updated
episode 46300, target net updated
episode 46310, target net updated
episode 46320, target net updated
episode 46330, target net updated
episode 46340, target net updated
episode 46350, target net updated
episode 46360, target net updated
episode 46370, target net updated
episode 46380, target net updated
episode 46390, target net updated
episode 46400, target net updated
episode 46410, target net updated
episode 46420, target net updated
episode 46430,

episode 48550, target net updated
episode 48560, target net updated
episode 48570, target net updated
episode 48580, target net updated
episode 48590, target net updated
episode 48600, target net updated
episode 48610, target net updated
episode 48620, target net updated
episode 48630, target net updated
episode 48640, target net updated
episode 48650, target net updated
episode 48660, target net updated
episode 48670, target net updated
episode 48680, target net updated
episode 48690, target net updated
episode 48700, target net updated
episode 48710, target net updated
episode 48720, target net updated
episode 48730, target net updated
episode 48740, target net updated
episode 48750, target net updated
episode 48760, target net updated
episode 48770, target net updated
episode 48780, target net updated
episode 48790, target net updated
episode 48800, target net updated
episode 48810, target net updated
episode 48820, target net updated
episode 48830, target net updated
episode 48840,

episode 50960, target net updated
episode 50970, target net updated
episode 50980, target net updated
episode 50990, target net updated
episode 51000, target net updated
episode 51010, target net updated
episode 51020, target net updated
episode 51030, target net updated
episode 51040, target net updated
episode 51050, target net updated
episode 51060, target net updated
episode 51070, target net updated
episode 51080, target net updated
episode 51090, target net updated
episode 51100, target net updated
episode 51110, target net updated
episode 51120, target net updated
episode 51130, target net updated
episode 51140, target net updated
episode 51150, target net updated
episode 51160, target net updated
episode 51170, target net updated
episode 51180, target net updated
episode 51190, target net updated
episode 51200, target net updated
episode 51210, target net updated
episode 51220, target net updated
episode 51230, target net updated
episode 51240, target net updated
episode 51250,

episode 53370, target net updated
episode 53380, target net updated
episode 53390, target net updated
episode 53400, target net updated
episode 53410, target net updated
episode 53420, target net updated
episode 53430, target net updated
episode 53440, target net updated
episode 53450, target net updated
episode 53460, target net updated
episode 53470, target net updated
episode 53480, target net updated
episode 53490, target net updated
episode 53500, target net updated
episode 53510, target net updated
episode 53520, target net updated
episode 53530, target net updated
episode 53540, target net updated
episode 53550, target net updated
episode 53560, target net updated
episode 53570, target net updated
episode 53580, target net updated
episode 53590, target net updated
episode 53600, target net updated
episode 53610, target net updated
episode 53620, target net updated
episode 53630, target net updated
episode 53640, target net updated
episode 53650, target net updated
episode 53660,

episode 55780, target net updated
episode 55790, target net updated
episode 55800, target net updated
episode 55810, target net updated
episode 55820, target net updated
episode 55830, target net updated
episode 55840, target net updated
episode 55850, target net updated
episode 55860, target net updated
episode 55870, target net updated
episode 55880, target net updated
episode 55890, target net updated
episode 55900, target net updated
episode 55910, target net updated
episode 55920, target net updated
episode 55930, target net updated
episode 55940, target net updated
episode 55950, target net updated
episode 55960, target net updated
episode 55970, target net updated
episode 55980, target net updated
episode 55990, target net updated
episode 56000, target net updated
episode 56010, target net updated
episode 56020, target net updated
episode 56030, target net updated
episode 56040, target net updated
episode 56050, target net updated
episode 56060, target net updated
episode 56070,

episode 58190, target net updated
episode 58200, target net updated
episode 58210, target net updated
episode 58220, target net updated
episode 58230, target net updated
episode 58240, target net updated
episode 58250, target net updated
episode 58260, target net updated
episode 58270, target net updated
episode 58280, target net updated
episode 58290, target net updated
episode 58300, target net updated
episode 58310, target net updated
episode 58320, target net updated
episode 58330, target net updated
episode 58340, target net updated
episode 58350, target net updated
episode 58360, target net updated
episode 58370, target net updated
episode 58380, target net updated
episode 58390, target net updated
episode 58400, target net updated
episode 58410, target net updated
episode 58420, target net updated
episode 58430, target net updated
episode 58440, target net updated
episode 58450, target net updated
episode 58460, target net updated
episode 58470, target net updated
episode 58480,

KeyboardInterrupt: 