In [1]:
import math
import datetime
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from itertools import count
from IPython.display import Audio

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torchsummary import summary

import cv2

import pyClientRLagentPytorch
import utils

In [2]:
# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
#model
class DQN(nn.Module):

    def __init__(self, h, w, input_channels, outputs):
        super(DQN, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 16, kernel_size=5, stride=2)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
        self.bn2 = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
        self.bn3 = nn.BatchNorm2d(32)

        # Number of Linear input connections depends on output of conv2d layers
        # and therefore the input image size, so compute it.
        def conv2d_size_out(size, kernel_size = 5, stride = 2):
            return (size - (kernel_size - 1) - 1) // stride  + 1
        convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
        convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
        linear_input_size = convw * convh * 32
        self.head = nn.Linear(linear_input_size, outputs)

    # Called with either one element to determine next action, or a batch
    # during optimization. Returns tensor([[left0exp,right0exp]...]).
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        return self.head(x.view(x.size(0), -1))

# class DQN(nn.Module):
#     def __init__(self, input_channels, outputs):
#         in_channels=input_channels
#         num_actions=outputs
#         super(DQN, self).__init__()
#         self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=8, stride=3)
#         self.bn1 = nn.BatchNorm2d(32)
#         self.conv2 = nn.Conv2d(32, 32, kernel_size=6, stride=2)
#         self.bn2 = nn.BatchNorm2d(32)
#         self.conv3 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
#         self.bn3 = nn.BatchNorm2d(64)
#         self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
#         self.bn4 = nn.BatchNorm2d(64)
#         self.fc5 = nn.Linear(6 * 6 * 64, 512)
#         self.fc6 = nn.Linear(512, num_actions)

#     def forward(self, x):
#         x = F.relu(self.bn1(self.conv1(x)))
#         x = F.relu(self.bn2(self.conv2(x)))
#         x = F.relu(self.bn3(self.conv3(x)))
#         x = F.relu(self.bn4(self.conv4(x)))
#         x = F.relu(self.fc5(x.view(x.size(0), -1)))
#         return self.fc6(x) 

In [4]:
#utilities
#sound wave to be played when training is complete
wave= np.sin(1.6*np.pi*400*np.arange(10000)/10000)

def make_stack(frame, stack_size):
    stack_of_frames_initial = np.zeros((size, size, stack_size))
    for i in range(stack_size):
        stack_of_frames_initial[:,:,i] = frame
    return stack_of_frames_initial

def update_stack(stack, new_frame, stack_size):
    stack2 = stack.copy()
    for i in range(stack_size - 1):
        stack[:,:,i] = stack2[:,:,i+1].copy()
    stack[:,:,stack_size-1] = new_frame
    return stack

def select_action(state):
    return test_net(state).max(1)[1].view(1, 1)

def state_to_phosphenes(raw_state, phosphenes_input, sigma, threshold_high, threshold_low, phosphene_resolution, simulator):
    image_array = environment.state2usableArray(raw_state)    
    ksize = 11 #np.round(4*sigma)+1
    blurred = cv2.GaussianBlur(image_array,(ksize,ksize),sigma)
    canny = cv2.Canny(blurred,threshold_low,threshold_high)   
    phosphenes = simulator(activation_mask=canny)
    if phosphenes_input:
        frame = phosphenes
    else:
        frame = cv2.cvtColor(image_array, cv2.COLOR_BGR2GRAY)
        frame = frame.astype('float32')
    # normalize to the range 0-1
    frame /= 255.0
    return image_array, blurred, canny, phosphenes, frame

def record_episode(result, image_array, blurred, canny, phosphenes, end, fps, SIGMA, threshold_high, threshold_low, phosphene_resolution, frame_size, box_bumps, wall_bumps_r, loops_r, steps_taken, last_loop, ep):
    outframe = np.zeros((frame_size[0],frame_size[1],3)).astype('uint8')
    individual_size = int(frame_size[0]/2)
    image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
    image_array = cv2.resize(image_array, (individual_size, individual_size))
    image_array = cv2.putText(image_array, ("Boxes hit: " + str(box_bumps)),(20, 40), 2, 1, (255, 255, 255), 1)
    image_array = cv2.putText(image_array, ("Wall bumps: " + str(wall_bumps_r)),(20, 70), 2, 1, (255, 255, 255), 1)
    image_array = cv2.putText(image_array, ("Loops: " + str(loops_r)),(20, 100), 2, 1, (255, 255, 255), 1)
    image_array = cv2.putText(image_array, ("Steps taken: " + str(steps_taken)),(20, 130), 2, 1, (255, 255, 255), 1)
    blurred = cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB)
    blurred = cv2.resize(blurred,(individual_size, individual_size))
    blurred = cv2.putText(blurred, ("Sigma: " + str(SIGMA)),(20,individual_size - 40), 2, 1, (255, 255, 255), 1)
    canny = cv2.resize(canny, (individual_size, individual_size))
    phos = (254*phosphenes/phosphenes.max()).astype('uint8')
    phos = cv2.resize(phos, (individual_size, individual_size))
    for c in range(3):
        outframe[:individual_size,:individual_size,c] = image_array[:,:,c]
        outframe[:individual_size,individual_size:frame_size[0],c] = blurred[:,:,c]
        outframe[individual_size:,:individual_size,c] = canny
        outframe[individual_size:,individual_size:frame_size[0],c] = phos
    outframe = cv2.putText(outframe, ("T_high: " + str(threshold_high)+", T_low: "+ str(threshold_low)),(20,individual_size + 40), 2, 1, (255, 255, 255), 1)
    outframe = cv2.putText(outframe, ("Phosphene res: (" + str(phosphene_resolution[0])+","+ str(phosphene_resolution[1])+ ")"),(individual_size + 20,individual_size + 40), 2, 1, (255, 255, 255), 1)
    result.write(outframe)
    if end == 2 and last_loop:       
        for i in range(int(fps *4)):
            result.write(outframe)
    
def get_current_date():
    date = datetime.datetime.now()
    return date.strftime("%Y-%m-%d_%H-%M")

def get_model_name(v_b, v_l_w, v_t, i_e):
    model_name_string = get_current_date()+"_Phos"+str(PHOSPHENES)+"_Res"+str(phosphene_resolution[0])+"_Ep"+\
        str(i_e)+"_BoxBumps"+str(v_b)+"_WallBumpsLoops"+str(v_l_w)+"_Steps"+str(v_t)+".pth"
    return model_name_string


In [5]:
#hyperparameters
PHOSPHENES = True #use the phosphene representation as input or not
SIGMA = 1.2 
threshold_high = 50 
threshold_low = int(0.5 * threshold_high)
phosphene_resolution= (26,26)
RECORD_EPISODES = False #does not work with 128 size yet
fps = 8

size       = 128        # screenshot size
screen_height = screen_width = size

frame_width = 1024
frame_height = 1024
frame_size = (frame_width, frame_height)

if RECORD_EPISODES:
    result = cv2.VideoWriter("./Models/" + 'RLAGENTPhosphenesExperimentFrameStacking.avi',
                         cv2.VideoWriter_fourcc(*'MJPG'),
                         fps, frame_size)

stack_size = 4

input_channels = stack_size

n_actions = 3

FOLDER = "2021-02-26_16-30_PhosTrue_Res26_FrameStacking_atari"
MODELNAME = "2021-02-26_17-45_PhosTrue_Res26_Ep320_BoxBumps8_WallBumps7_Loops2_Steps1790_TotalErrors17_Improved"
PATH = "./Models/" + FOLDER +"/"+ MODELNAME +".pth"
test_net = DQN(size, size, input_channels, n_actions).to(device)
test_net.load_state_dict(torch.load(PATH))
test_net.eval()
summary(test_net, (input_channels, size, size))

steps_done = 0

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 62, 62]           1,616
       BatchNorm2d-2           [-1, 16, 62, 62]              32
            Conv2d-3           [-1, 32, 29, 29]          12,832
       BatchNorm2d-4           [-1, 32, 29, 29]              64
            Conv2d-5           [-1, 32, 13, 13]          25,632
       BatchNorm2d-6           [-1, 32, 13, 13]              64
            Linear-7                    [-1, 3]          16,227
Total params: 56,467
Trainable params: 56,467
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.25
Forward/backward pass size (MB): 1.43
Params size (MB): 0.22
Estimated Total Size (MB): 1.90
----------------------------------------------------------------


In [6]:
# Unity environment
ip         = "127.0.0.1" # Ip address that the TCP/IP interface listens to
port       = 13000       # Port number that the TCP/IP interface listens to
timescale  = 1           # update step scale for unity

screen_height = screen_width = size

environment = pyClientRLagentPytorch.Environment(ip = ip, port = port, size = size, timescale = timescale) 

In [7]:
test_episodes = 5

random_np_seed_test = 32423
last_loop = False
box_bump_count = 0
wall_bump_count = 0
loop_count = 0
total_steps_taken = 0
total_reward = 0

environment.reset() # This reset ensures the test seed will be reset to its first value the next time environment.reset_test() is used
np.random.seed(random_np_seed_test)#seed used for the generation of the phosphene simulators for the test session
for ep in range(test_episodes):
    wall_piece_hit = False
    simulator = utils.phosphene_simulator(phosphene_resolution, (screen_height, screen_width)) 
    print("Loop: "+str(ep + 1))
    steps_not_moved_forward = 0
    end, reward, state_raw = environment.reset_test() #resets the test environment and the first time also sets the random seed
    original, blurred, canny, phosphene_frame, frame = state_to_phosphenes(state_raw, PHOSPHENES, SIGMA, threshold_high, threshold_low, phosphene_resolution, simulator)
    
    stack_of_frames = make_stack(frame, stack_size)
    state = stack_of_frames.transpose((2, 0, 1))
    state = torch.from_numpy(state).unsqueeze(0).to(device, dtype=torch.float)
    
    for t in count():
        # Select and perform an action
        action = select_action(state)
        move = action.item()

        if move == 0:
            steps_not_moved_forward = 0
        else:
            steps_not_moved_forward += 1

        if steps_not_moved_forward >= 11:
            loop_count += 1
            move = 0
            steps_not_moved_forward = 0

        end, reward, next_state_raw = environment.step_test(move)

        original, blurred, canny, phosphene_frame, next_frame = state_to_phosphenes(next_state_raw, PHOSPHENES, SIGMA, threshold_high, threshold_low, phosphene_resolution, simulator)
        stack_of_frames = update_stack(stack_of_frames, next_frame, stack_size)
        state = stack_of_frames.transpose((2, 0, 1))
        state = torch.from_numpy(state).unsqueeze(0).to(device, dtype=torch.float)
                
        if RECORD_EPISODES:
            if ep == test_episodes -1:
                last_loop = True
            record_episode(result, original, blurred, canny, phosphene_frame, end, fps, SIGMA, threshold_high, threshold_low, phosphene_resolution, frame_size, box_bump_count, wall_bump_count, loop_count, t + total_steps_taken, last_loop, ep)

        if reward == 200:
            box_bump_count +=1
        if reward == 150:
            if wall_piece_hit == False:
                wall_bump_count += 1
                wall_piece_hit = True
        else:
            wall_piece_hit = False
        
        if reward > 100:
            total_reward += -(reward -100)
        else:
            total_reward += reward
        
        if end == 2:
            total_steps_taken += t
            break

if RECORD_EPISODES:
    result.release()
    cv2.destroyAllWindows()

Loop: 1
Loop: 2
Loop: 3
Loop: 4
Loop: 5


In [8]:
box_bump_average = box_bump_count/test_episodes
wall_bump_average = wall_bump_count/test_episodes
loop_average = loop_count/test_episodes
average_steps_taken = total_steps_taken/test_episodes
average_reward = total_reward/test_episodes
print("Average box bumps: " + str(box_bump_average))
print("Average wall bumps: " + str(wall_bump_average))
print("Average loops: " + str(loop_average))
print("Average steps taken: " + str(average_steps_taken))
print("Average reward: " + str(average_reward)+ '\n')
#play sound when done
Audio(wave, rate=10000, autoplay=True)

Average box bumps: 6.2
Average wall bumps: 3.8
Average loops: 4.2
Average steps taken: 737.6
Average reward: 25681.0



In [9]:
complex_hallway = 0 # 0 is false, 1 is true

if PHOSPHENES:
    phos_res = phosphene_resolution[0]
else:
    phos_res = 0

numpy_array = np.array([phos_res, complex_hallway,box_bump_average, wall_bump_average, loop_average,\
               average_steps_taken,average_reward])
np.set_printoptions(suppress=True)
print(numpy_array)
PATH_stats = "./Stats/"+ MODELNAME +".csv"
np.savetxt(PATH_stats, numpy_array, delimiter=',')

[   26.      0.      6.2     3.8     4.2   737.6 25681. ]
