In [1]:
import math
import datetime
import os, sys
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple
from itertools import count
from IPython.display import Audio
import csv

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torchsummary import summary

import cv2




# local files
sys.path.insert(0, '../')
import pyClient
import utils
import model
from model import Transition
import training

In [2]:
# Environment parameters
IMSIZE = 128
STACK_SIZE = 4
N_ACTIONS = 3
IP  = "127.0.0.1" # Ip address that the TCP/IP interface listens to
PORT = 13000       # Port number that the TCP/IP interface listens to

environment =  pyClient.Environment(ip = IP, port = PORT, size = IMSIZE) # or choose # DummyEnvironment()


# Model parameters
BATCH_SIZE = 128 #original 128
GAMMA = 0.1 # 0.999
EPS_START = 0.95
EPS_END = 0.05
EPS_DECAY_steps = 4000 
EPS_DECAY = (EPS_START - EPS_END)/EPS_DECAY_steps
REPLAY_START_SIZE =  1500
TARGET_UPDATE = 10 #episodes
DEVICE = 'cuda:0'
MEMORY_CAPACITY = 12000

# agent = model.DoubleDQNAgent(imsize=IMSIZE, 
#                  in_channels=STACK_SIZE,
#                  n_actions=N_ACTIONS,
#                  memory_capacity=MEMORY_CAPACITY,
#                  eps_start=EPS_START,
#                  eps_end=EPS_END,
#                  eps_delta=EPS_DECAY,
#                  gamma_discount = GAMMA,
#                  batch_size = BATCH_SIZE,
#                  device=DEVICE)


# Optimizer Parameters
LR_DQN = 0.0001 # 0.01

# optimizer = optim.Adam(agent.policy_net.parameters(), lr = LR_DQN)

In [3]:
# Image processing
class ImageProcessor(object):
    def __init__(self, phosphene_resolution=None, imsize=128):
        """ @TODO 
        - Extended image processing
        """
        self.imsize = imsize
        if phosphene_resolution is not None:
            self.simulator = utils.PhospheneSimulator(phosphene_resolution=(phosphene_resolution,phosphene_resolution),
                                                     size=(128,128),
                                                     jitter=0.25,
                                                     intensity_var=0.9,
                                                     aperture=.66,
                                                     sigma=0.60,)
        else: 
            self.simulator = None
    
    def __call__(self,state_raw,):
        frame = environment.state2usableArray(state_raw)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame = frame.astype('float32')
        if self.simulator is not None:
            frame = self.simulator(frame)
    
        return torch.Tensor(frame / 255.).view(1,1,self.imsize, self.imsize)



In [None]:
## Training parameters 
MAX_EPISODES = 600 # number of episodes (an episode ends after agent hits a box)
MAX_STEPS  = 5e4  # number of optimization steps (each time step the model parameters are updated)
RESET_UPON_END_SIGNAL = {0:False,  # Nothing happened
                         1:True,   # Box collision
                         2:False,   # Wall collision
                         3:True}  # Reached step target
RESET_AFTER_NR_SIDESTEPS = 5


# Training configuration dictionary
cfg = dict()
# cfg['seed']                     = SEED
# cfg['training_condition']       = TRAINING_CONDITION 
cfg['max_episodes']             = MAX_EPISODES
# cfg['model_path']               = MODEL_PATH
cfg['max_steps']                = MAX_STEPS
cfg['target_update']            = TARGET_UPDATE
cfg['stack_size']               = STACK_SIZE
cfg['reset_after_nr_sidesteps'] = RESET_AFTER_NR_SIDESTEPS
cfg['reset_upon_end_signal']    = RESET_UPON_END_SIGNAL
cfg['replay_start_size']        = REPLAY_START_SIZE
# cfg['logfile']                  = LOGFILE

# for seed in [0,1,2,3,4]:
#     for complexity in ['plain', 'complex']:
#         for phosphene_resolution in [None, 50,42,34,26,18,10]:

seed = 0
phosphene_resolution = None

for complexity in ['plain', 'complex']:
    for GAMMA in [0.1, 0.2, 0.5, 0.9, 0.99, 0.999]:
        for LR_DQN in [0.00005, 0.0001, 0.0002, 0.0006, 0.002]:

            # Condition-specific configuration
            cfg['training_condition'] = 0 if complexity == 'plain' else 1 # 0: plain training, 1: complex training, 2: plain testing 3: complex testing
            cfg['seed'] = seed
            torch.manual_seed(seed)
            model_name = 'exp1_{}_phos{}_S{}_LR{}_gamma{}'.format(complexity,phosphene_resolution,seed,LR_DQN,GAMMA)
            model_name = model_name.replace('.', '')
            model_name = model_name.replace('+', '')
            model_name = model_name.replace(',', '')
            print(model_name)
            cfg['logfile'] = 'Out/Exp1/{}_train_stats.csv'.format(model_name)
            cfg['model_path'] = 'Out/Exp1/{}_best_model.pth'.format(model_name)

            # Phosphene simulation
            img_processing = ImageProcessor(phosphene_resolution = phosphene_resolution)

            # Re-initialize model and optimizer
            agent = model.DoubleDQNAgent(imsize=IMSIZE,
                 in_channels=STACK_SIZE,
                 n_actions=N_ACTIONS,
                 memory_capacity=MEMORY_CAPACITY,
                 eps_start=EPS_START,
                 eps_end=EPS_END,
                 eps_delta=EPS_DECAY,
                 gamma_discount = GAMMA,
                 batch_size = BATCH_SIZE,
                 device=DEVICE)

            optimizer = optim.Adam(agent.policy_net.parameters(), lr = LR_DQN)

            # Start training
            training.train(agent, environment, img_processing, optimizer, cfg)
            print('finished training')

exp1_plain_phosNone_S0_LR5e-05_gamma01
step count 25 wall_collisions: 26, box_collisions: 0, endless_loops: 5, total_reward: -26.400000000000002
episode 0, target net updated
episode 10, target net updated
episode 20, target net updated
episode 30, target net updated
episode 40, target net updated
step count 25 wall_collisions: 25, box_collisions: 0, endless_loops: 5, total_reward: -25.5
episode 50, target net updated
episode 60, target net updated
episode 70, target net updated
episode 80, target net updated
episode 90, target net updated
step count 478 wall_collisions: 0, box_collisions: 51, endless_loops: 0, total_reward: 326.7
new best model
episode 100, target net updated
episode 110, target net updated
episode 120, target net updated
episode 130, target net updated
episode 140, target net updated
step count 510 wall_collisions: 0, box_collisions: 30, endless_loops: 0, total_reward: 388.6999999999999
new best model
episode 150, target net updated
episode 160, target net updated
ep

episode 310, target net updated
episode 320, target net updated
episode 330, target net updated
episode 340, target net updated
step count 511 wall_collisions: 0, box_collisions: 2, endless_loops: 1, total_reward: 441.7999999999995
new best model
episode 350, target net updated
episode 360, target net updated
episode 370, target net updated
episode 380, target net updated
episode 390, target net updated
step count 506 wall_collisions: 5, box_collisions: 4, endless_loops: 1, total_reward: 433.99999999999966
episode 400, target net updated
episode 410, target net updated
episode 420, target net updated
episode 430, target net updated
episode 440, target net updated
step count 511 wall_collisions: 0, box_collisions: 1, endless_loops: 1, total_reward: 444.7999999999993
new best model
episode 450, target net updated
episode 460, target net updated
episode 470, target net updated
episode 480, target net updated
episode 490, target net updated
step count 505 wall_collisions: 0, box_collisions

episode 10, target net updated
episode 20, target net updated
episode 30, target net updated
episode 40, target net updated
step count 25 wall_collisions: 25, box_collisions: 0, endless_loops: 5, total_reward: -25.5
episode 50, target net updated
episode 60, target net updated
episode 70, target net updated
episode 80, target net updated
episode 90, target net updated
step count 482 wall_collisions: 0, box_collisions: 51, endless_loops: 0, total_reward: 326.29999999999995
new best model
episode 100, target net updated
episode 110, target net updated
episode 120, target net updated
episode 130, target net updated
episode 140, target net updated
step count 504 wall_collisions: 0, box_collisions: 35, endless_loops: 0, total_reward: 372.0999999999998
new best model
episode 150, target net updated
episode 160, target net updated
episode 170, target net updated
episode 180, target net updated
episode 190, target net updated
step count 457 wall_collisions: 5, box_collisions: 23, endless_loops

episode 310, target net updated
episode 320, target net updated
episode 330, target net updated
episode 340, target net updated
step count 521 wall_collisions: 0, box_collisions: 1, endless_loops: 0, total_reward: 472.39999999999975
new best model
episode 350, target net updated
episode 360, target net updated
episode 370, target net updated
episode 380, target net updated
episode 390, target net updated
step count 527 wall_collisions: 0, box_collisions: 0, endless_loops: 0, total_reward: 474.7999999999996
new best model
episode 400, target net updated
episode 410, target net updated
episode 420, target net updated
episode 430, target net updated
episode 440, target net updated
step count 519 wall_collisions: 0, box_collisions: 0, endless_loops: 0, total_reward: 475.5999999999997
new best model
episode 450, target net updated
episode 460, target net updated
episode 470, target net updated
episode 480, target net updated
episode 490, target net updated
step count 527 wall_collisions: 0,

episode 10, target net updated
episode 20, target net updated
episode 30, target net updated
episode 40, target net updated
step count 25 wall_collisions: 25, box_collisions: 0, endless_loops: 5, total_reward: -25.5
episode 50, target net updated
episode 60, target net updated
episode 70, target net updated
episode 80, target net updated
episode 90, target net updated
step count 480 wall_collisions: 0, box_collisions: 49, endless_loops: 0, total_reward: 332.5
new best model
episode 100, target net updated
episode 110, target net updated
episode 120, target net updated
episode 130, target net updated
episode 140, target net updated
step count 524 wall_collisions: 0, box_collisions: 34, endless_loops: 0, total_reward: 374.1999999999998
new best model
episode 150, target net updated
episode 160, target net updated
episode 170, target net updated
episode 180, target net updated
episode 190, target net updated
step count 549 wall_collisions: 7, box_collisions: 29, endless_loops: 0, total_re

episode 310, target net updated
episode 320, target net updated
episode 330, target net updated
episode 340, target net updated
step count 527 wall_collisions: 0, box_collisions: 0, endless_loops: 0, total_reward: 474.7999999999996
new best model
episode 350, target net updated
episode 360, target net updated
episode 370, target net updated
episode 380, target net updated
episode 390, target net updated
step count 519 wall_collisions: 0, box_collisions: 0, endless_loops: 0, total_reward: 475.5999999999997
new best model
episode 400, target net updated
episode 410, target net updated
episode 420, target net updated
episode 430, target net updated
episode 440, target net updated
step count 521 wall_collisions: 0, box_collisions: 0, endless_loops: 0, total_reward: 475.39999999999964
episode 450, target net updated
episode 460, target net updated
episode 470, target net updated
episode 480, target net updated
episode 490, target net updated
step count 525 wall_collisions: 0, box_collisions

episode 10, target net updated
episode 20, target net updated
episode 30, target net updated
episode 40, target net updated
step count 25 wall_collisions: 25, box_collisions: 0, endless_loops: 5, total_reward: -25.5
episode 50, target net updated
episode 60, target net updated
episode 70, target net updated
episode 80, target net updated
episode 90, target net updated
step count 481 wall_collisions: 0, box_collisions: 52, endless_loops: 0, total_reward: 323.4
new best model
episode 100, target net updated
episode 110, target net updated
episode 120, target net updated
episode 130, target net updated
episode 140, target net updated
step count 384 wall_collisions: 9, box_collisions: 25, endless_loops: 2, total_reward: 250.9000000000002
episode 150, target net updated
episode 160, target net updated
episode 170, target net updated
episode 180, target net updated
episode 190, target net updated
step count 592 wall_collisions: 37, box_collisions: 26, endless_loops: 1, total_reward: 334.9999

step count 609 wall_collisions: 20, box_collisions: 9, endless_loops: 0, total_reward: 425.9999999999992
new best model
episode 350, target net updated
episode 360, target net updated
episode 370, target net updated
episode 380, target net updated
episode 390, target net updated
step count 586 wall_collisions: 30, box_collisions: 7, endless_loops: 1, total_reward: 412.0999999999997
episode 400, target net updated
episode 410, target net updated
episode 420, target net updated
episode 430, target net updated
episode 440, target net updated
step count 511 wall_collisions: 18, box_collisions: 2, endless_loops: 2, total_reward: 379.3999999999996
episode 450, target net updated
episode 460, target net updated
episode 470, target net updated
episode 480, target net updated
episode 490, target net updated
step count 507 wall_collisions: 16, box_collisions: 2, endless_loops: 1, total_reward: 402.4999999999998
episode 500, target net updated
episode 510, target net updated
episode 520, target n

episode 60, target net updated
episode 70, target net updated
episode 80, target net updated
episode 90, target net updated
step count 481 wall_collisions: 0, box_collisions: 50, endless_loops: 0, total_reward: 329.4
new best model
episode 100, target net updated
episode 110, target net updated
episode 120, target net updated
episode 130, target net updated
episode 140, target net updated
step count 452 wall_collisions: 6, box_collisions: 39, endless_loops: 1, total_reward: 297.20000000000005
episode 150, target net updated
episode 160, target net updated
episode 170, target net updated
episode 180, target net updated
episode 190, target net updated
step count 473 wall_collisions: 38, box_collisions: 27, endless_loops: 2, total_reward: 241.80000000000018
episode 200, target net updated
episode 210, target net updated
episode 220, target net updated
episode 230, target net updated
episode 240, target net updated
step count 408 wall_collisions: 41, box_collisions: 12, endless_loops: 4, t

episode 410, target net updated
episode 420, target net updated
episode 430, target net updated
episode 440, target net updated
step count 171 wall_collisions: 35, box_collisions: 3, endless_loops: 5, total_reward: 27.69999999999999
episode 450, target net updated
episode 460, target net updated
episode 470, target net updated
episode 480, target net updated
episode 490, target net updated
step count 143 wall_collisions: 34, box_collisions: 0, endless_loops: 5, total_reward: 34.899999999999956
episode 500, target net updated
episode 510, target net updated
episode 520, target net updated
episode 530, target net updated
episode 540, target net updated
step count 264 wall_collisions: 49, box_collisions: 8, endless_loops: 5, total_reward: 73.3
episode 550, target net updated
episode 560, target net updated
episode 570, target net updated
episode 580, target net updated
episode 590, target net updated
finished training
exp1_plain_phosNone_S0_LR00006_gamma099
step count 25 wall_collisions: 

step count 519 wall_collisions: 2, box_collisions: 38, endless_loops: 0, total_reward: 359.79999999999967
new best model
episode 150, target net updated
episode 160, target net updated
episode 170, target net updated
episode 180, target net updated
episode 190, target net updated
step count 423 wall_collisions: 38, box_collisions: 19, endless_loops: 3, total_reward: 233.40000000000018
episode 200, target net updated
episode 210, target net updated
episode 220, target net updated
episode 230, target net updated
episode 240, target net updated
step count 396 wall_collisions: 39, box_collisions: 14, endless_loops: 4, total_reward: 133.60000000000053
episode 250, target net updated
episode 260, target net updated
episode 270, target net updated
episode 280, target net updated
episode 290, target net updated
step count 641 wall_collisions: 70, box_collisions: 23, endless_loops: 3, total_reward: 204.90000000000066
episode 300, target net updated
episode 310, target net updated
episode 320, t

episode 460, target net updated
episode 470, target net updated
episode 480, target net updated
episode 490, target net updated
step count 228 wall_collisions: 42, box_collisions: 5, endless_loops: 5, total_reward: 66.90000000000002
episode 500, target net updated
episode 510, target net updated
episode 520, target net updated
episode 530, target net updated
episode 540, target net updated
step count 238 wall_collisions: 38, box_collisions: 2, endless_loops: 5, total_reward: 97.20000000000007
episode 550, target net updated
episode 560, target net updated
episode 570, target net updated
episode 580, target net updated
episode 590, target net updated
finished training
exp1_plain_phosNone_S0_LR00006_gamma0999
step count 25 wall_collisions: 25, box_collisions: 0, endless_loops: 5, total_reward: -25.5
episode 0, target net updated
episode 10, target net updated
episode 20, target net updated
episode 30, target net updated
episode 40, target net updated
step count 25 wall_collisions: 25, bo

step count 492 wall_collisions: 0, box_collisions: 32, endless_loops: 0, total_reward: 382.29999999999984
new best model
episode 200, target net updated
episode 210, target net updated
episode 220, target net updated
episode 230, target net updated
episode 240, target net updated
step count 489 wall_collisions: 8, box_collisions: 24, endless_loops: 1, total_reward: 356.49999999999966
episode 250, target net updated
episode 260, target net updated
episode 270, target net updated
episode 280, target net updated
episode 290, target net updated
step count 491 wall_collisions: 5, box_collisions: 22, endless_loops: 1, total_reward: 363.8999999999995
episode 300, target net updated
episode 310, target net updated
episode 320, target net updated
episode 330, target net updated
episode 340, target net updated
step count 446 wall_collisions: 5, box_collisions: 13, endless_loops: 2, total_reward: 329.39999999999975
episode 350, target net updated
episode 360, target net updated
episode 370, targe

episode 510, target net updated
episode 520, target net updated
episode 530, target net updated
episode 540, target net updated
step count 449 wall_collisions: 0, box_collisions: 1, endless_loops: 1, total_reward: 398.1999999999998
episode 550, target net updated
episode 560, target net updated
episode 570, target net updated
episode 580, target net updated
episode 590, target net updated
finished training
exp1_complex_phosNone_S0_LR00006_gamma01
step count 25 wall_collisions: 25, box_collisions: 0, endless_loops: 5, total_reward: -25.5
episode 0, target net updated
episode 10, target net updated
episode 20, target net updated
episode 30, target net updated
episode 40, target net updated
step count 25 wall_collisions: 25, box_collisions: 0, endless_loops: 5, total_reward: -25.5
episode 50, target net updated
episode 60, target net updated
episode 70, target net updated
episode 80, target net updated
episode 90, target net updated
step count 476 wall_collisions: 0, box_collisions: 50, e

episode 210, target net updated
episode 220, target net updated
episode 230, target net updated
episode 240, target net updated
step count 520 wall_collisions: 1, box_collisions: 24, endless_loops: 0, total_reward: 404.7999999999996
new best model
episode 250, target net updated


## Debugging:

#### Visualize replay memory 

In [None]:
# EXAMPLES = 10

# i = 0
# bad_choices = [(state, action, next_state,reward) for (state, action, next_state,reward) in agent.memory.memory]# if reward<0 and action ==0]
# # for state, action, next_state, reward in agent.memory.memory[:EXAMPLES]:
# for state, action, next_state, reward in bad_choices[:EXAMPLES]:
#     i+=1
    
#     plt.figure(figsize = (10,10), dpi=200)
#     img = torch.cat([state[0,t,...] for t in range(STACK_SIZE)],dim=1)
#     if next_state is not None:
#         img = torch.cat([img, next_state[0,-1,...]],dim=1)
#         plt.axvline(x=STACK_SIZE*IMSIZE,color='r')
#     plt.imshow(img.detach().cpu().numpy())
#     plt.title('Action: {}, Reward {}'.format(action.item(),reward.item()))
#     plt.axis('off')
#     plt.ylabel('frames >')
#     plt.xlabel('state | next state')
#     plt.show()
    
# # plt.tight_layout()


#### Inspecting q-value predictions

In [None]:
# state_action_values, expected_state_action_values = agent.forward()
# transitions = agent.memory.sample(agent.batch_size)
# # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
# # detailed explanation). This converts batch-array of Transitions
# # to Transition of batch-arrays.
# batch = Transition(*zip(*transitions))

# # Compute a mask of non-final states and concatenate the batch elements
# # (a final state would've been the one after which simulation ended)
# non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
#                                       batch.next_state)), device=agent.device, dtype=torch.bool)
# non_final_next_states = torch.cat([s for s in batch.next_state
#                                             if s is not None])
# state_batch = torch.cat(batch.state)
# action_batch = torch.cat(batch.action)
# reward_batch = torch.cat(batch.reward)

# # Compute Q(s_t, a) - the model computes Q(s_t), then we select the
# # columns of actions taken. These are the actions which would've been taken
# # for each batch state according to policy_net
# pred = agent.policy_net(state_batch)

# EXAMPLES = 50

# actions = action_batch[:EXAMPLES].detach().cpu().numpy()
# rewards = reward_batch[:EXAMPLES].detach().cpu().numpy()
# predicted = pred[:EXAMPLES].detach().cpu().numpy().squeeze()
# obtained = np.zeros((EXAMPLES,3))
# for i in range(EXAMPLES):
#     obtained[i,actions[i]] = rewards[i]

# plt.figure(figsize=(10,10),dpi=100)
# plt.subplot(1,2,1)
# plt.imshow(predicted)
# plt.subplot(1,2,2)
# plt.imshow(obtained)
# plt.show()

#### Visualize an episode

In [None]:
# Reset environment at start of episode
seed = torch.randint(250,(1,)).item()
_, _, _ = environment.setRandomSeed(seed)
_, _, _ = environment.reset(cfg['training_condition'])

# Create an empty frame stack and fill it with frames
frame_stack = utils.FrameStack(stack_size=cfg['stack_size'] )
for _ in range(cfg['stack_size'] ):
    _, _, frame_raw = environment.step(0)
    frame = img_processing(frame_raw).to(agent.device) 
    state = frame_stack.update_with(frame)

In [None]:
# for t in count(): 

#     agent.policy_net.eval()
#     # Agent performs a step (based on the current state) and obtains next state
#     action = agent.select_action(state)
#     end, reward, frame_raw = environment.step(action.item())
#     agent_died = cfg['reset_upon_end_signal'][end] # or side_steps > cfg['reset_after_nr_sidesteps']
#     frame = img_processing(frame_raw).to(agent.device)
#     next_state = frame_stack.update_with(frame) if not agent_died else None
#     pred = agent.policy_net(state).argmax(axis=1)
    
#     # Interpret reward signal
#     if reward > 100:
#         reward = -(reward -100)
#     reward /= 10
    
#     # Visualize state and print pred, action and reward
#     plt.figure(figsize = (10,10), dpi=200)
#     img = torch.cat([state[0,t,...] for t in range(STACK_SIZE)],dim=1)
#     if next_state is not None:
#         img = torch.cat([img, next_state[0,-1,...]],dim=1)
#         plt.axvline(x=STACK_SIZE*IMSIZE,color='r')
#     plt.imshow(img.detach().cpu().numpy())
#     plt.title('Action: {}, Reward {}'.format(action.item(),reward))
#     plt.axis('off')
#     plt.ylabel('frames >')
#     plt.xlabel('state | next state')
#     plt.show()
#     print('Pred: {}'.format(pred.item()))
#     print('Action: {}'.format(action.item()))
#     print('Reward: {}\n'.format(reward))

#     # the episode ends here if agent performed any 'lethal' action (specified in RESET_UPON_END_SIGNAL)
#     if agent_died:
#         break
#     else:
#         state = next_state


#### Toy environment for debugging

In [None]:
# class DummyEnvironment:
#     def __init__(self, ip = "127.0.0.1", port = 13000, size = 128, channels=16):
#         self.size = size
#         self.channels = channels
#         self._maxSteps = 100
#         self.reset()
 
#     def reset(self, kind=0):
#         self._state = 1
#         self._steps = 0
#         return self._receive()

#     def step(self, action):
#         self._state = -self._state
#         self._steps += 1
#         return self._receive(action)
    
#     def setRandomSeed(self, action):
#         return self._receive()

#     def _receive(self,action=0):
#         end    = 0 if self._steps < self._maxSteps else 3
#         reward = {0:-self._state, 1:self._state, 2:-2}[action]
#         state  = [150-100*self._state for _ in range(262144)] # raw state
#         return end, reward, state
    
#     def state2arrays(self,state):
#         if self.channels == 3:
#             return {'colors' : self.state2usableArray(state),}
        
#         else:
#             state  = np.array(state, "uint8").reshape(self.size, self.size, self.channels)
#             arrays = {'colors' : state[...,:3],
#                     'objseg' : state[...,3:6],
#                     'semseg': state[...,6:9],
#                     'normals'   : state[...,9:12],
#                     'flow'   : state[...,12:15],
#                     'depth'  : state[...,15]}
#             return arrays
    
#     def state2usableArray(self, state):
#         return np.array(state, "uint8").reshape(self.size, self.size, 16)[...,:3]

## Validation:

#### Fast evaluation of trained model

In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

MODEL_NAME = 'exp1_plain_phosNone_S0'

stats = pd.read_csv('./Out/Exp1/{}_train_stats.csv'.format(MODEL_NAME))

plt.figure(figsize = (16,4),dpi=200)
plt.subplot(1,2,1)
sns.lineplot(data= stats.loc[stats.validation==0],x='episode', y='reward')
plt.plot(np.convolve(stats.loc[stats.validation==0].reward,np.ones(10)/10,mode='valid'))
plt.title('training performance')
plt.legend(['training_reward', 'running average (N=10)'])
plt.subplot(1,2,2)
sns.lineplot(data= stats.loc[stats.validation==1],x='episode', y='reward')
plt.title('validation performance')
plt.legend(['validation_reward'])
plt.show()

plt.figure(figsize = (16,4))
sns.lineplot(data= stats.loc[stats.validation==0],x='episode', y='train_loss')
plt.title('training curves')
plt.legend(['Train Loss'])

plt.figure(figsize = (16,4))
sns.lineplot(data= stats.loc[stats.validation==0],x='episode', y='epsilon')
plt.title('training curves')
plt.legend(['Train Loss'])

### Further testing and evaluation:

In [None]:
import torch
import cv2
import pandas as pd
import os, sys
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np 

# # local files
sys.path.insert(0, '../')
import pyClient
import utils
import model
from model import Transition
from testing import test

#### Run models on test environment:

In [None]:
# Image processing
class ImageProcessor(object):
    def __init__(self, phosphene_resolution=None, imsize=128):
        """ @TODO 
        - Extended image processing
        """
        self.imsize = imsize
        if phosphene_resolution is not None:
            self.simulator = utils.PhospheneSimulator(phosphene_resolution=(phosphene_resolution,phosphene_resolution),
                                                     size=(128,128),
                                                     jitter=0.25,
                                                     intensity_var=0.9,
                                                     aperture=.66,
                                                     sigma=0.60,)
        else: 
            self.simulator = None
    
    def __call__(self,state_raw,):
        frame = environment.state2usableArray(state_raw)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame = frame.astype('float32')
        if self.simulator is not None:
            frame = self.simulator(frame)
    
        return torch.Tensor(frame / 255.).view(1,1,self.imsize, self.imsize)
    
img_processing = ImageProcessor

In [None]:
# Environment parameters
IMSIZE = 128
STACK_SIZE = 4
N_ACTIONS = 3
IP  = "127.0.0.1" # Ip address that the TCP/IP interface listens to
PORT = 13000       # Port number that the TCP/IP interface listens to

environment = pyClient.Environment(ip = IP, port = PORT, size = IMSIZE) 

# Model parameters
BATCH_SIZE = 128 #original 128
DEVICE = 'cuda:0'

agent = model.DoubleDQNAgent(imsize=IMSIZE,
                 in_channels=STACK_SIZE,
                 n_actions=N_ACTIONS,
                 device=DEVICE)


## Testing parameters 
RESET_UPON_END_SIGNAL = {0:False,  # Nothing happened
                         1:False,   # Box collision
                         2:False,   # Wall collision
                         3:True}  # Reached step target
RESET_AFTER_NR_SIDESTEPS = 5


# Testing configuration dictionary
cfg = dict()
cfg['stack_size']               = STACK_SIZE
cfg['reset_after_nr_sidesteps'] = RESET_AFTER_NR_SIDESTEPS
cfg['reset_upon_end_signal']    = RESET_UPON_END_SIGNAL

test_data = []
for complexity in ['plain', 'complex']:
    for phosphene_resolution in [None, 50,42,34,26,18,10]:
        for seed in [0,1,2,3,4]:
            
            # Condition-specific configuration
            cfg['training_condition']       = 2 if complexity == 'plain' else 3 # 0: plain training, 1: complex training, 2: plain testing 3: complex testing
            model_name = 'exp1_{}_phos{}_S{}'.format(complexity, phosphene_resolution,seed)
            model_path = 'Out/Exp1/{}_best_model.pth'.format(model_name)
            print(model_name)

            # Phosphene simulation
            img_processing = ImageProcessor(phosphene_resolution = phosphene_resolution)

            # Re-initialize model and optimizer
            agent = model.DoubleDQNAgent(imsize=IMSIZE,
                 in_channels=STACK_SIZE,
                 n_actions=N_ACTIONS,
                 device=DEVICE)
            agent.policy_net.load_state_dict(torch.load(model_path,map_location=DEVICE))

            # Testing
            conditions = {'complexity': complexity, 'phosphene_resolution': phosphene_resolution, 'seed' : seed}
            results = test(agent, environment, img_processing, cfg)
            test_data.append({**conditions,**results})
data = pd.DataFrame(test_data)
data.to_csv('Out/Exp1/_test_results.csv', index=False)

In [None]:
test_data = pd.read_csv('./Out/Exp1/_test_results.csv') 
test_data

In [None]:
sns.lineplot(data=test_data,x='phosphene_resolution', y='box_collisions', hue='complexity')
plt.show()
sns.lineplot(data=test_data,x='phosphene_resolution', y='wall_collisions', hue='complexity')
plt.show()
sns.lineplot(data=test_data,x='phosphene_resolution', y='cumulative_reward', hue='complexity')

#### Training and validation curves

In [None]:
all_stats = pd.DataFrame()
best_stats = pd.DataFrame()
for complexity in ['plain', 'complex']:
    for phosphene_resolution in [None, 50,42,34,26,18,10]:
        for seed in [0,1,2,3,4]:
            model_name = 'exp1_{}_phos{}_S{}'.format(complexity, phosphene_resolution,seed)
            train_csv = './Out/Exp1/{}_train_stats.csv'.format(model_name)
            train_stats = pd.read_csv(train_csv)
            train_stats['complexity'] = complexity
            train_stats['resolution'] = phosphene_resolution
            train_stats['seed'] = seed
            train_stats['model_name'] = model_name
            
            # append all rows
            all_stats = all_stats.append(train_stats,ignore_index=True)
            
            # append only best-performing validation row
            val = train_stats.loc[train_stats.validation==1].reset_index()
            best_stats = best_stats.append(val.iloc[[val.reward.idxmax()]], ignore_index=True)


In [None]:
MODEL_NAME = 'exp1_plain_phos18_S2'

stats = all_stats.loc[all_stats.model_name==MODEL_NAME]

plt.figure(figsize = (16,4))
plt.subplot(1,2,1)
sns.lineplot(data= stats.loc[stats.validation==0],x='episode', y='reward')
plt.plot(np.convolve(stats.loc[stats.validation==0].reward,np.ones(10)/10,mode='valid'))
plt.title('training performance')
plt.legend(['training_reward', 'running average (N=10)'])
plt.subplot(1,2,2)
sns.lineplot(data= stats.loc[stats.validation==1],x='episode', y='reward')
plt.title('validation performance')
plt.legend(['validation_reward'])
plt.show()


In [None]:
for y in ['reward', 'step_count', 'wall_collisions', 'box_collisions', 'endless_loops']:
    sns.lineplot(data=best_stats, x='resolution',y=y,hue='complexity')
    plt.show()