In [2]:
import tensorflow as tf
import keras
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Conv3D, MaxPooling3D, Activation, Flatten, concatenate, Input
from keras.callbacks import TensorBoard
from keras.utils.vis_utils import plot_model
from tensorflow.keras.optimizers import Adam

from collections import deque
import numpy as np
import matplotlib.pyplot as plt
import random
import os
import time
from datetime import datetime
import math
from tqdm import tqdm
from PIL import Image

import modifiedTB as tb
from agent import Point

DISCOUNT = 0.95
MIN_REWARD = -1.00

SIZE = 20
N_BUILDINGS = 50
N_OBSTACLES = 10
MAX_HEIGHT = SIZE - 2
MOVE_PENALTY = 0.01
COLLISION_PENALTY = 0.25
GOAL_REWARD = 1.00
ACTION_SPACE_SIZE = 6
GROUND_PROX_PENALTY = 0.2

TOTAL_TIME = 1000
MAX_LAMBDA = 0.1
N_HUBS = 2

OBSERVATION_DIST = 1  # Change this: {1,2,3}
HETERO_REWARD = True  # Change this: {T(only when BINARY_ENV==F),F}
BINARY_ENV = False  # Change this: {T,F}

In [7]:
class DQNAgent:
    def __init__(self, filepath):
        self.terrain_size = (1 + (2 * OBSERVATION_DIST)) ** 3
        # Load architecture and weights from model file
        self.model = keras.models.load_model(filepath)
        
    def update_replay_memory(self, transition):
        self.replay_memory.append(transition)
    
    def get_qs(self, state):
        with tf.device('/GPU:0'):
            return self.model.predict(state)[0]

In [8]:
class P2PEnv:
    def reset(self, model):
        self.terrain_map = self.generate_terrain()
        self.dep_times = self.generate_dep_times(TOTAL_TIME, MAX_LAMBDA)
        self.DQNagent = DQNAgent(model)
        
        empty_blocks_iter = self.empty_blocks(self.terrain_map)
        self.agents = []
        self.drones_map = np.zeros((SIZE, SIZE, SIZE))
        self.goals_map = np.zeros((SIZE, SIZE, SIZE))

        self.dynamic_obs = [Point(*next(empty_blocks_iter)) for i in range(N_OBSTACLES)]
        self.obstacles_map = np.zeros((SIZE, SIZE, SIZE))
        for obs in self.dynamic_obs:
            self.obstacles_map[tuple(obs.location())] = 1
        
        # Keep track of the drone's trajectory
        self.paths = []
        
        # Keep track of the drone's rewards, and whether it is done
        self.rewards = [0] * len(self.dep_times)
        self.dones = [False] * len(self.dep_times)
            
        self.episode_step = 0
        
    def step(self):
        self.episode_step += 1
        all_done = len(self.agents) == len(self.dep_times)
        
        if self.episode_step in self.dep_times:
            terrain = self.terrain_map == 1
            obstacles = self.obstacles_map == 1
            drones = self.drones_map == 1
            goals = self.goals_map == 1
            empty_blocks_iter = self.empty_blocks(terrain|obstacles|drones|goals)
            
            for entries in range(len([t for t in self.dep_times if t == self.episode_step])):
                start = Point(*next(empty_blocks_iter))
                end = Point(*next(empty_blocks_iter))
                L1 = sum(abs(start.location() - end.location()))
                while L1 != SIZE:
                    start = Point(*next(empty_blocks_iter))
                    end = Point(*next(empty_blocks_iter))
                    L1 = sum(abs(start.location() - end.location()))
                self.agents.append([start, end])
                self.paths.append([self.agents[-1][0].location()])
                self.drones_map[tuple(self.agents[-1][0].location())] = 1
                self.goals_map[tuple(self.agents[-1][1].location())] = 1
                    
        terrain = self.terrain_map == 1
        obstacles = self.obstacles_map == 1
        drones = self.drones_map == 1
        
        for idx, agent in enumerate(self.agents):
            if self.dones[idx]:
                continue
            
            all_done = False
            
            current_state = self.generate_state(agent)
            if np.random.rand() < 0.95:
                for i in range(ACTION_SPACE_SIZE):
                    action = np.argsort(self.DQNagent.get_qs([np.array([current_state[:self.DQNagent.terrain_size]]), np.array([current_state[self.DQNagent.terrain_size:]])]))[-i-1]
                    n = agent[0].copy().action(action)    
                    n.within_bounds(SIZE, SIZE, SIZE)
                    if (terrain|obstacles|drones).astype(int)[tuple(n.location())] != 1:
                        break
            else:
                for action in np.random.permutation(ACTION_SPACE_SIZE):
                    n = agent[0].copy().action(action)    
                    n.within_bounds(SIZE, SIZE, SIZE)
                    if (terrain|obstacles|drones).astype(int)[tuple(n.location())] != 1:
                        break
            
            self.drones_map[tuple(agent[0].location())] = 0            
            agent[0].action(action).within_bounds(SIZE,SIZE,SIZE)
            self.drones_map[tuple(agent[0].location())] = 1
            
            drones = self.drones_map == 1
            
            self.paths[idx].append(agent[0].location())
            
            if agent[0] == agent[1]:
                reward = GOAL_REWARD
                done = True
            else:
                reward = (-MOVE_PENALTY) * (1.0 + (HETERO_REWARD * 2.0 * self.terrain_map[tuple(agent[0].location())]))
                done = False
            
            self.dones[idx] = done
            self.rewards[idx] += reward
                        
            if done:
                self.drones_map[tuple(agent[0].location())] = 0
                self.goals_map[tuple(agent[1].location())] = 0
                drones = self.drones_map == 1
                    
        goals = self.goals_map == 1
        
        obstacles = self.obstacles_map == 1
        
        for obs in self.dynamic_obs:
            while True:
                n = obs.copy()
                n.drift_heading = obs.drift_heading
                if n.drift().within_bounds(SIZE, SIZE, SIZE):
                    if (terrain|obstacles|drones|goals).astype(int)[tuple(n.location())] != 1:
                        break
                else:
                    obs.drift_heading = 2 * np.random.rand(3) - 1
                    continue
            self.obstacles_map[tuple(obs.location())] = 0
            obs.x, obs.y, obs.z = n.x, n.y, n.z
            self.obstacles_map[tuple(obs.location())] = 1
            obstacles = self.obstacles_map == 1
        
        if self.episode_step >= TOTAL_TIME + 100:
            all_done = True

        return all_done

    def render(self, elev=60, azim=45, save=""):
        fig = plt.figure(figsize=(10, 10))
        ax = fig.add_subplot(projection='3d')
        ax.set_xlabel("x")
        ax.set_ylabel("y")
        ax.set_zlabel("z")
        ax.view_init(elev=elev, azim=azim)
        ax.grid(True)
        
        terrain = self.terrain_map==1
        drones = self.drones_map==1
        goals = self.goals_map==1
        obstacles = self.obstacles_map==1
        
        voxelarr = terrain | drones | goals | obstacles
        colors = np.empty(terrain.shape, dtype=object)
        colors[terrain] = '#7A88CCC0'
        colors[drones] = '#FFD65DC0'
        colors[goals] = '#607D3BC0'
        colors[obstacles] = '#FDA4BAC0'
        ax.voxels(voxelarr, facecolors=colors, shade=True)
        
#         for action in range(len(self.path)-1):
#             xline = np.linspace(self.path[action][0] + 0.5, self.path[action+1][0] + 0.5, 1000)
#             yline = np.linspace(self.path[action][1] + 0.5, self.path[action+1][1] + 0.5, 1000)
#             zline = np.linspace(self.path[action][2] + 0.5, self.path[action+1][2] + 0.5, 1000)
#             ax.plot3D(xline, yline, zline, 'black')
        
        if save != "":
            plt.savefig(save)
        plt.show()
        

    def generate_terrain(self):
        terrain = np.zeros((SIZE, SIZE, SIZE))
        
        if GROUND_PROX_PENALTY and not BINARY_ENV:
            for i in range(SIZE//2):
                terrain[:,:,i] = GROUND_PROX_PENALTY * (SIZE // 2 - i)/(SIZE//2)
        
        for i in range(N_BUILDINGS):
            while True:
                # Generate random numbers in intervals of 0.5
                x, y = np.random.randint(0, SIZE*2, 2) / 2
                # Generate random building height
                z = np.random.randint(0, MAX_HEIGHT)
                # Check if existing buildings exist. If so, regenerate. Otherwise, keep building.
                if np.all(terrain[math.floor(x):math.ceil(x)+1, math.floor(y):math.ceil(y)+1, 0:z] != 1):
                    if not BINARY_ENV:
                        terrain[math.floor(x)-1:math.ceil(x)+2, math.floor(y)-1:math.ceil(y)+2, 0:z+1] \
                                = terrain[math.floor(x)-1:math.ceil(x)+2, math.floor(y)-1:math.ceil(y)+2, 0:z+1].clip(min=0.5)
                    terrain[math.floor(x):math.ceil(x)+1, math.floor(y):math.ceil(y)+1, 0:z].fill(1)
                    break
        return terrain
    
    def generate_state(self, agent):
        x,y,z = tuple(agent[0].location())
        # padded_terrain = np.pad(np.maximum(np.maximum(self.terrain_map, self.obstacles_map), self.drones_map), OBSERVATION_DIST, 'constant', constant_values = 1)
        padded_terrain = np.pad(np.maximum(self.terrain_map, self.obstacles_map), OBSERVATION_DIST, 'constant', constant_values = 1)
        s = padded_terrain[x:x+1+2*OBSERVATION_DIST,y:y+1+2*OBSERVATION_DIST,z:z+1+2*OBSERVATION_DIST].flatten()
        return np.append(s, agent[0].vector(agent[1]))

    def empty_blocks(self, occupied):           
        empty_blocks = [[x,y,z] for x in range(SIZE) for y in range(SIZE) for z in range(SIZE) if occupied[x,y,z]!=1]
        random.shuffle(empty_blocks)
        return iter(empty_blocks)
    
    def generate_dep_times(self, T, max_lambda):
        # Rate function = max_lambda * (1 - (t - T/2)^2/(T^2/4))
        dep_times = []
        t = 0
        while True:
            next_time = np.random.exponential(1/max_lambda)
            t += next_time
            if t > T:
                break
            if np.random.rand() <= (1 - (t - T/2)**2/(T**2/4)):
                dep_times.append(t)

        dep_times = [math.floor(dep_time) for dep_time in dep_times]
        return dep_times   

In [9]:
env = P2PEnv()

In [None]:
import csv

for test in [("25",20,50,10,0.1),
         ("26",20,50,10,0.2),
         ("27",20,50,10,0.5),
         ("28",20,50,25,0.1),
         ("29",20,50,25,0.2),
         ("30",20,50,25,0.5),
         ("31",20,100,10,0.1),
         ("32",20,100,10,0.2),
         ("33",20,100,10,0.5)]:
    
    
    MODEL_NAME, SIZE, N_BUILDINGS, N_OBSTACLES, MAX_LAMBDA = test
    
    for i, model in enumerate([
#                     "13_10600____0.97max____0.87avg____0.12min.model",
#                  "14_14600____0.99max____0.80avg___-0.43min.model",
#                  "15_07200____0.99max____0.87avg____0.74min.model",
#                  "16_02600____0.97max____0.73avg___-1.06min.model",
#                  "17_03200____0.97max____0.65avg___-0.48min.model",
#                  "18_14000____0.98max____0.84avg___-0.26min.model",
                 "19_04400____0.96max____0.79avg___-0.30min.model"
#                  "20_10000____1.00max____0.72avg___-0.29min.model",
#                  "21_22400____0.98max____0.64avg___-0.43min.model",
#                  "22_01200____0.58max___-0.58avg___-1.30min.model",
#                  "23_11200____0.88max___-0.00avg___-1.00min.model",
#                  "24_10400____0.94max____0.21avg___-1.06min.model"
    ]):
        
        OBSERVATION_DIST = (i % 3) + 1

        env = P2PEnv()

        step = 1

        env.reset("models/"+model)
        
        env.render(save=f"visualisations/{MODEL_NAME}_{step}.png")
        
        while not env.step():
            step += 1
            env.render(save=f"visualisations/{MODEL_NAME}_{step}.png")
#             if step >0 and step %100 == 0:
#                 print(".", end="")
                
#         rewards = env.rewards
#         lengths = [len(p) for p in env.paths]

#         with open(f'Test_{model[:2]}-Model_{MODEL_NAME}.csv', 'w', newline='') as file:
#             mywriter = csv.writer(file, delimiter=',')
#             for i in range(len(rewards)):
#                 mywriter.writerow((rewards[i], lengths[i]))

In [59]:
with open(f'temp.csv', 'w', newline='') as file:
    mywriter = csv.writer(file, delimiter=',')

    for MODEL_NAME in range(25,31):
        print(MODEL_NAME)
        store = []
        average = [[0,0,0,0],[0,0,0,0],[0,0,0,0]]
        for agent in range(13,25):

            rewards = []
            lengths = []
            with open(f'Test_{agent}-Model_{MODEL_NAME}.csv') as file:
                myreader = csv.reader(file)
                for i in myreader:
                    rewards.append(float(i[0]))
                    lengths.append(float(i[1]))

            ELECTRIC_COST = 0.172625
            VOR_HR = 1.308
            DELTA_X_M = 2
            DELTA_T_S = 0.2
            VELOCITY = DELTA_X_M/DELTA_T_S

            import statistics

            avg_distance = statistics.mean([l for l in lengths if l < 101]) * DELTA_X_M
            standard_dev = statistics.stdev([l for l in lengths if l < 101]) * DELTA_X_M

            # print(f"Average Time = {avg_distance / 10}")
            # print(f"Std Dev Time = {standard_dev / 10}")

            avg_cost_t = ELECTRIC_COST / 1000 * (avg_distance)
            # print(f"Average Tangible Cost = {avg_cost}")
            avg_cost = avg_cost_t + standard_dev / VELOCITY * (VOR_HR / 3600)
            print(f"Average Cost = {avg_cost}, Percentage Tangible = {avg_cost_t/avg_cost}")

            store.append(str(avg_cost)[:7])

        for x in range(4):
            for y in range(3):
                average[y][x] = store[x*3+y]

        for z in average:
            mywriter.writerow(z)

        

25
Average Cost = 0.011809616974969602, Percentage Tangible = 0.8939647747529545
Average Cost = 0.014004681367296375, Percentage Tangible = 0.8864617664436013
Average Cost = 0.012610210454715031, Percentage Tangible = 0.9120276327593051
Average Cost = 0.012435746195430038, Percentage Tangible = 0.8987496506745097
Average Cost = 0.015573461023109053, Percentage Tangible = 0.8910432416012914
Average Cost = 0.014165214235651173, Percentage Tangible = 0.8996848228948593
Average Cost = 0.011722559997229374, Percentage Tangible = 0.9164027772944262
Average Cost = 0.01485814666409296, Percentage Tangible = 0.9030693224414397
Average Cost = 0.01217137774101886, Percentage Tangible = 0.8957598565201309
Average Cost = 0.018877568781755904, Percentage Tangible = 0.8778672821478948
Average Cost = 0.012298665699818386, Percentage Tangible = 0.9120184854916348
Average Cost = 0.00932256739899901, Percentage Tangible = 0.9411690678586694
26
Average Cost = 0.012316518612653192, Percentage Tangible = 0.

In [None]:
class HASEnv:
    def reset(self, model):
        self.terrain_map, self.hubs = self.generate_terrain()
        self.dep_times = self.generate_dep_times(TOTAL_TIME, MAX_LAMBDA)
        self.DQNagent = DQNAgent(model)
        
        empty_blocks_iter = self.empty_blocks(self.terrain_map)
        self.agents = []
        self.drones_map = np.zeros((SIZE, SIZE, SIZE))
        self.goals_map = np.zeros((SIZE, SIZE, SIZE))

        self.dynamic_obs = [Point(*next(empty_blocks_iter)) for i in range(N_OBSTACLES)]
        self.obstacles_map = np.zeros((SIZE, SIZE, SIZE))
        for obs in self.dynamic_obs:
            self.obstacles_map[tuple(obs.location())] = 1
        
        # Keep track of the drone's trajectory
        self.paths = []
        
        # Keep track of the drone's rewards, and whether it is done
        self.rewards = [0] * len(self.dep_times)
        self.dones = [False] * len(self.dep_times)
            
        self.episode_step = 0
        
    def step(self):
        self.episode_step += 1
        all_done = len(self.agents) == len(self.dep_times)
        
        if self.episode_step in self.dep_times:
            terrain = self.terrain_map == 1
            obstacles = self.obstacles_map == 1
            drones = self.drones_map == 1
            goals = self.goals_map == 1
            empty_blocks_iter = self.empty_blocks(terrain|obstacles|drones|goals)
                        
            for start_ in [self.hubs[i] for i in np.random.permutation(N_HUBS)][:len([t for t in self.dep_times if t == self.episode_step])]:
                start = Point(*start_, 0)
                end = Point(*next(empty_blocks_iter))
                L1 = sum(abs(start.location() - end.location()))
                while L1 != SIZE:
                    end = Point(*next(empty_blocks_iter))
                    L1 = sum(abs(start.location() - end.location()))
                self.agents.append([start, end])
                self.paths.append([self.agents[-1][0].location()])
                self.drones_map[tuple(self.agents[-1][0].location())] = 1
                self.goals_map[tuple(self.agents[-1][1].location())] = 1
            
        returns = []
        
        terrain = self.terrain_map == 1
        obstacles = self.obstacles_map == 1
        drones = self.drones_map == 1
        
        for idx, agent in enumerate(self.agents):
            if self.dones[idx]:
                continue
            
            all_done = False
            
            current_state = self.generate_state(agent)
            if np.random.rand() < 0.95:
                for i in range(ACTION_SPACE_SIZE):
                    action = np.argsort(self.DQNagent.get_qs([np.array([current_state[:self.DQNagent.terrain_size]]), np.array([current_state[self.DQNagent.terrain_size:]])]))[-i-1]
                    n = agent[0].copy().action(action)    
                    n.within_bounds(SIZE, SIZE, SIZE)
                    if (terrain|obstacles|drones).astype(int)[tuple(n.location())] != 1:
                        break
            else:
                for action in np.random.permutation(ACTION_SPACE_SIZE):
                    n = agent[0].copy().action(action)    
                    n.within_bounds(SIZE, SIZE, SIZE)
                    if (terrain|obstacles|drones).astype(int)[tuple(n.location())] != 1:
                        break
            
            self.drones_map[tuple(agent[0].location())] = 0            
            agent[0].action(action).within_bounds(SIZE,SIZE,SIZE)
            self.drones_map[tuple(agent[0].location())] = 1
            
            drones = self.drones_map == 1
            
            self.paths[idx].append(agent[0].location())
            
            if agent[0] == agent[1]:
                reward = GOAL_REWARD
                done = True
            else:
                reward = (-MOVE_PENALTY) * (1.0 + (HETERO_REWARD * 2.0 * self.terrain_map[tuple(agent[0].location())]))
                done = False
            
            self.dones[idx] = done
            self.rewards[idx] += reward
                        
            if done:
                self.drones_map[tuple(agent[0].location())] = 0
                self.goals_map[tuple(agent[1].location())] = 0
                drones = self.drones_map == 1
                    
        goals = self.goals_map == 1
        
        obstacles = self.obstacles_map == 1
        
        for obs in self.dynamic_obs:
            while True:
                n = obs.copy()
                n.drift_heading = obs.drift_heading
                if n.drift().within_bounds(SIZE, SIZE, SIZE):
                    if (terrain|obstacles|drones|goals).astype(int)[tuple(n.location())] != 1:
                        break
                else:
                    obs.drift_heading = 2 * np.random.rand(3) - 1
                    continue
            self.obstacles_map[tuple(obs.location())] = 0
            obs.x, obs.y, obs.z = n.x, n.y, n.z
            self.obstacles_map[tuple(obs.location())] = 1
            obstacles = self.obstacles_map == 1
        
        if self.episode_step >= TOTAL_TIME + 100:
            all_done = True

        return all_done

    def render(self, elev=60, azim=45, save=""):
        fig = plt.figure(figsize=(10, 10))
        ax = fig.add_subplot(projection='3d')
        ax.set_xlabel("x")
        ax.set_ylabel("y")
        ax.set_zlabel("z")
        ax.view_init(elev=elev, azim=azim)
        ax.grid(True)
        
        terrain = self.terrain_map==1
        drones = self.drones_map==1
        goals = self.goals_map==1
        obstacles = self.obstacles_map==1
        
        voxelarr = terrain | drones | goals | obstacles
        colors = np.empty(terrain.shape, dtype=object)
        colors[terrain] = '#7A88CCC0'
        colors[drones] = '#FFD65DC0'
        colors[goals] = '#607D3BC0'
        colors[obstacles] = '#FDA4BAC0'
        ax.voxels(voxelarr, facecolors=colors, shade=True)
        
#         for action in range(len(self.path)-1):
#             xline = np.linspace(self.path[action][0] + 0.5, self.path[action+1][0] + 0.5, 1000)
#             yline = np.linspace(self.path[action][1] + 0.5, self.path[action+1][1] + 0.5, 1000)
#             zline = np.linspace(self.path[action][2] + 0.5, self.path[action+1][2] + 0.5, 1000)
#             ax.plot3D(xline, yline, zline, 'black')
        
        if save != "":
            plt.savefig(save)
        plt.show()
        

    def generate_terrain(self):
        terrain = np.zeros((SIZE, SIZE, SIZE))
        hubs = []
        
        if GROUND_PROX_PENALTY and not BINARY_ENV:
            for i in range(SIZE//2):
                terrain[:,:,i] = GROUND_PROX_PENALTY * (SIZE // 2 - i)/(SIZE//2)
                
        for i in range(N_HUBS):
            while True:
                x, y = np.random.normal((SIZE-1)/2, SIZE//10, 2)
                x, y = int(x), int(y)
                if terrain[x,y,0] != 1:
                    terrain[x,y,0] = 1
                    hubs.append((x,y))
                    break
        
        for i in range(N_BUILDINGS):
            while True:
                # Generate random numbers in intervals of 0.5
                x, y = np.random.randint(0, SIZE*2, 2) / 2
                # Generate random building height
                z = np.random.randint(0, MAX_HEIGHT)
                # Check if existing buildings exist. If so, regenerate. Otherwise, keep building.
                if np.all(terrain[math.floor(x):math.ceil(x)+1, math.floor(y):math.ceil(y)+1, 0:z] != 1):
                    if not BINARY_ENV:
                        terrain[math.floor(x)-1:math.ceil(x)+2, math.floor(y)-1:math.ceil(y)+2, 0:z+1] \
                                = terrain[math.floor(x)-1:math.ceil(x)+2, math.floor(y)-1:math.ceil(y)+2, 0:z+1].clip(min=0.5)
                    terrain[math.floor(x):math.ceil(x)+1, math.floor(y):math.ceil(y)+1, 0:z].fill(1)
                    break
        return terrain, hubs
    
    def generate_state(self, agent):
        x,y,z = tuple(agent[0].location())
        # padded_terrain = np.pad(np.maximum(np.maximum(self.terrain_map, self.obstacles_map), self.drones_map), OBSERVATION_DIST, 'constant', constant_values = 1)
        padded_terrain = np.pad(np.maximum(self.terrain_map, self.obstacles_map), OBSERVATION_DIST, 'constant', constant_values = 1)
        s = padded_terrain[x:x+1+2*OBSERVATION_DIST,y:y+1+2*OBSERVATION_DIST,z:z+1+2*OBSERVATION_DIST].flatten()
        return np.append(s, agent[0].vector(agent[1]))

    def empty_blocks(self, occupied):           
        empty_blocks = [[x,y,z] for x in range(SIZE) for y in range(SIZE) for z in range(SIZE) if occupied[x,y,z]!=1]
        random.shuffle(empty_blocks)
        return iter(empty_blocks)
    
    def generate_dep_times(self, T, max_lambda):
        # Rate function = max_lambda * (1 - (t - T/2)^2/(T^2/4))
        dep_times = []
        t = 0
        while True:
            next_time = np.random.exponential(1/max_lambda)
            t += next_time
            if t > T:
                break
            if np.random.rand() <= (1 - (t - T/2)**2/(T**2/4)):
                dep_times.append(t)

        dep_times = [math.floor(dep_time) for dep_time in dep_times]
        return dep_times   

In [None]:
import csv

SIZE, N_BUILDINGS, N_OBSTACLES = 20, 50, 25

for test in [("35",2,0.2),
         ("36",2,0.5),
         ("37",5,0.1),
         ("38",5,0.2),
         ("39",5,0.5),
         ("40",10,0.1),
         ("41",10,0.2),
         ("42",10,0.5)]:
    
    
    MODEL_NAME, N_HUBS, MAX_LAMBDA = test
    
    for i, model in enumerate([
#                 "13_10600____0.97max____0.87avg____0.12min.model",
#                  "14_14600____0.99max____0.80avg___-0.43min.model",
#                  "15_07200____0.99max____0.87avg____0.74min.model",
#                  "16_02600____0.97max____0.73avg___-1.06min.model",
#                  "17_03200____0.97max____0.65avg___-0.48min.model",
#                  "18_14000____0.98max____0.84avg___-0.26min.model",
                 "19_04400____0.96max____0.79avg___-0.30min.model"
#                  "20_10000____1.00max____0.72avg___-0.29min.model",
#                  "21_22400____0.98max____0.64avg___-0.43min.model",
#                  "22_01200____0.58max___-0.58avg___-1.30min.model",
#                  "23_11200____0.88max___-0.00avg___-1.00min.model",
#                  "24_10400____0.94max____0.21avg___-1.06min.model"
    ]):
        
        OBSERVATION_DIST = (i % 3) + 1

        env = HASEnv()
#         print(f'\nTest_{model[:2]}-Model_{MODEL_NAME}.csv ', end='')

        step = 1

        env.reset("models/"+model)
        
        env.render(save=f"visualisations/{MODEL_NAME}_{step}.png")
        
        while not env.step():
            step += 1
            env.render(save=f"visualisations/{MODEL_NAME}_{step}.png")
            
            
#             if step >0 and step %100 == 0:
#                 print(".", end="")
#         rewards = env.rewards
#         lengths = [len(p) for p in env.paths]

#         with open(f'Test_{model[:2]}-Model_{MODEL_NAME}.csv', 'w', newline='') as file:
#             mywriter = csv.writer(file, delimiter=',')
#             for j in range(min(len(rewards), len(lengths))):
#                 mywriter.writerow((rewards[j], lengths[j]))