In [32]:
# import:
import random
import itertools
import math

# indexing:
STARTING = 0
DESTINATION = 1
ROW = 0
COLUMN = 1

# adjustable variables:
n_rows = 1                                                                          # number of rows of the street
n_cols = 17                                     # +2 for the "standing" slot        # number of columns of the street
num_of_pedestrian = 10                                                              # number of pedestrians
sl_coverage = 2                                                                     # street light coverage area (excluding the light)
md_coverage = 2                                                                     # motion detection coverage area (excluding the light)
brightness_lvl_lowest = 1                                                           # lowest brightness level
brightness_lvl_highest = 3                                                          # highest brightness level
nl_opt = [[1],[2],[3],[2],[1]]                                                      # natural light level (3 = darkest)
# bounded variables:
positions = [(0,(n_rows - 1)), (0,(n_cols - 1))]                                    # number of end points
tds = list(range(1, (num_of_pedestrian + 1)))                                      # list of time delay
brightness_lvl = list(range(brightness_lvl_lowest, (brightness_lvl_highest + 1)))   # list of brightness level


In [33]:
class PedestriansMaker():
    
    def __init__(self, n_rows, n_cols, positions, num_of_pedestrian, tds):
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.positions = positions
        self.num_of_pedestrian = num_of_pedestrian
        self.tds = tds
        self.list_sd = self.create_starting_destination(self.positions)
        self.pedestrians_sd = self.create_pedestrians_sd(self.num_of_pedestrian, self.list_sd)
        self.pedestrians_s = self.pedestrians_sd[STARTING]
        self.pedestrians_d = self.pedestrians_sd[DESTINATION]
        self.pedestrians_pathway = self.create_pathway(self.pedestrians_s, self.num_of_pedestrian)
        self.pedestrians_td = self.create_pedestrians_td(self.pedestrians_s, self.tds)
  
    @staticmethod
    def create_starting_destination(positions):
        starting_pt = positions # since list mentioned in the init method, can we put self.positions?
        destination_pt = positions
        list_sd = []
        for starting, destination in itertools.product(starting_pt, destination_pt):
            if starting != destination:
                list_sd.append((starting, destination))
        return list_sd # return a list of tuple [starting destination option]

    @staticmethod
    def create_pedestrians_sd(num_of_pedestrian, list_sd):
        pedestrian_s = []
        pedestrian_d = []
        for idx_pedestrian_starting in range(num_of_pedestrian):
            pedestrian_sd = random.choice(list_sd)
            pedestrian_s.append(pedestrian_sd[STARTING])
            pedestrian_d.append(pedestrian_sd[DESTINATION])
            pedestrians_sd = [pedestrian_s, pedestrian_d]
        return pedestrians_sd # return list of tuple [starting][destination]
    
    @staticmethod
    def create_pathway(pedestrians_s, num_of_pedestrian):
        pedestrians_pathway = []
        for idx_pedestrian in range(num_of_pedestrian):
            pedestrian_s = pedestrians_s[idx_pedestrian]
            if pedestrian_s == (0,0): # will later generalize this
                row_update = 0
                col_update = 1
            else:
                row_update = 0
                col_update = -1
            pedestrians_pathway.append((row_update, col_update))
        return pedestrians_pathway # return the direction of a pedestrian

    @staticmethod
    def pedestrian_loc_update(list_current_pedestrian, pedestrians_pathway, pedestrians_td, pedestrians_d, time):
        updated_pedestrian_loc = []
        for idx_pedestrian in range(len(pedestrians_pathway)):
            pedestrian_loc = list_current_pedestrian[idx_pedestrian]
            pedestrian_row = pedestrian_loc[ROW]
            pedestrian_col = pedestrian_loc[COLUMN]
            pedestrian_pathway = pedestrians_pathway[idx_pedestrian]
            pedestrian_pathway_row = pedestrian_pathway[ROW]
            pedestrian_pathway_col = pedestrian_pathway[COLUMN]
            pedestrian_d = pedestrians_d[idx_pedestrian]
            pedestrian_td = pedestrians_td[idx_pedestrian]
            if pedestrian_loc != pedestrian_d:
                if pedestrian_td <= time:
                    update_pedestrian_row = pedestrian_row + pedestrian_pathway_row
                    update_pedestrian_col = pedestrian_col + pedestrian_pathway_col
                    updated_pedestrian_loc.append((update_pedestrian_row, update_pedestrian_col))
                else:
                    updated_pedestrian_loc.append((pedestrian_row, pedestrian_col))
            else:
                    updated_pedestrian_loc.append((pedestrian_row, pedestrian_col))
        return updated_pedestrian_loc # used to update the current location

    @staticmethod
    def create_pedestrians_td(pedestrians_s, tds):
        pedestrians_td = []
        for idx_pedestrian in range(len(pedestrians_s)):
            pedestrians_td.append(random.choice(tds))
        return pedestrians_td # return the list of pedestrians' time delay

In [34]:
print('test for pedestrian maker')
pm = PedestriansMaker(n_rows, n_cols, positions, num_of_pedestrian, tds)
print('starting destination combination = {}'.format(pm.list_sd))
print('pedestrian starting destination combination = {}'.format(pm.pedestrians_sd))
print('pedestrian starting = {}'.format(pm.pedestrians_s))
print('pedestrian destination = {}'.format(pm.pedestrians_d))
print('pedestrian pathway = {}'.format(pm.pedestrians_pathway))
print('pedestrian time delay = {}'.format(pm.pedestrians_td))

test for pedestrian maker
starting destination combination = [((0, 0), (0, 16)), ((0, 16), (0, 0))]
pedestrian starting destination combination = [[(0, 0), (0, 0), (0, 16), (0, 16), (0, 16), (0, 16), (0, 16), (0, 0), (0, 0), (0, 0)], [(0, 16), (0, 16), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 16), (0, 16), (0, 16)]]
pedestrian starting = [(0, 0), (0, 0), (0, 16), (0, 16), (0, 16), (0, 16), (0, 16), (0, 0), (0, 0), (0, 0)]
pedestrian destination = [(0, 16), (0, 16), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 16), (0, 16), (0, 16)]
pedestrian pathway = [(0, 1), (0, 1), (0, -1), (0, -1), (0, -1), (0, -1), (0, -1), (0, 1), (0, 1), (0, 1)]
pedestrian time delay = [3, 6, 1, 3, 5, 3, 5, 8, 6, 7]


In [35]:
class StreetLightMaker():
    def __init__(self, n_rows, n_cols, sl_coverage, md_coverage): # action will later be deleted ?
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.sl_locs = self.create_sl(self.n_rows, self.n_cols, self.sl_coverage)
        self.sls_cvrg_area = self.create_sl_coverage_area(self.n_rows, self.n_cols, self.sl_coverage, self.sl_locs)
        self.mds_cvrg_area = self.create_motion_detection_area(self.n_rows, self.n_cols, self.md_coverage, self.sl_locs)

    @staticmethod
    def create_sl(n_rows, n_cols, sl_coverage):
        n_rows = n_rows - 1 # should later changed to 2 when there are more than 1 row
        n_cols = n_cols - 2
        list_sl_rows = list(range((sl_coverage+1), n_rows, (sl_coverage*2+1)))
        list_sl_cols = list(range((sl_coverage+1), n_cols, (sl_coverage*2+1)))
        
        if list_sl_rows == []:
            list_sl_rows = [0]
            
        if list_sl_cols == []:
            list_sl_cols = [0]

        sl_locs = []
        for row, col in itertools.product(list_sl_rows, list_sl_cols):
            sl_locs.append((row,col))
        
        return sl_locs # return a list of tuple [street light coordinate]

    @staticmethod
    def create_sl_coverage_area(n_rows, n_cols, sl_coverage, sl_locs):
        sls_cvrg_area = []
        for idx_sl in range(len(sl_locs)):
            sl_loc = sl_locs[idx_sl]
            sl_row = sl_loc[ROW]
            sl_col = sl_loc[COLUMN]
            coverage_row = list(range((sl_row - sl_coverage), (sl_row + sl_coverage + 1)))
            coverage_col = list(range((sl_col - sl_coverage), (sl_col + sl_coverage + 1)))
            rows = list(range(0, (n_rows))) # will be adjusted like the cols when there are more than 1 row
            cols = list(range(1, (n_cols-1)))
            coverage_row = list(x for x in coverage_row if x in rows)
            coverage_col = list(x for x in coverage_col if x in cols)
            coverage_coor = []
            for row, col in itertools.product(coverage_row, coverage_col):
                coverage_coor.append((row,col))
            sls_cvrg_area.append(coverage_coor)
        return sls_cvrg_area # return list of list of tuple

    @staticmethod
    def create_motion_detection_area(n_rows, n_cols, md_coverage, sl_locs):
        mds_cvrg_area = []
        for idx_sl in range(len(sl_locs)):
            sl_loc = sl_locs[idx_sl]
            sl_row = sl_loc[ROW]
            sl_col = sl_loc[COLUMN]
            coverage_row = list(range((sl_row - md_coverage), (sl_row + md_coverage + 1)))
            coverage_col = list(range((sl_col - md_coverage), (sl_col + md_coverage + 1)))
            rows = list(range(0, (n_rows))) # will be adjusted like the cols when there are more than 1 row
            cols = list(range(1, (n_cols-1)))
            coverage_row = list(x for x in coverage_row if x in rows)
            coverage_col = list(x for x in coverage_col if x in cols)
            coverage_coor = []
            for row, col in itertools.product(coverage_row, coverage_col):
                coverage_coor.append((row,col))
            mds_cvrg_area.append(coverage_coor)
        return mds_cvrg_area # return list of list of tuple


In [36]:
print('test for street light')
slm = StreetLightMaker(n_rows, n_cols, sl_coverage, md_coverage)
print('list of street light coordinate = {}'.format(slm.sl_locs))
print('coverage coordinate for each light = {}'.format(slm.sls_cvrg_area))
print('motion detection coverage area = {}'.format(slm.mds_cvrg_area))

test for street light
list of street light coordinate = [(0, 3), (0, 8), (0, 13)]
coverage coordinate for each light = [[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)], [(0, 6), (0, 7), (0, 8), (0, 9), (0, 10)], [(0, 11), (0, 12), (0, 13), (0, 14), (0, 15)]]
motion detection coverage area = [[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)], [(0, 6), (0, 7), (0, 8), (0, 9), (0, 10)], [(0, 11), (0, 12), (0, 13), (0, 14), (0, 15)]]


In [37]:
class Visualization():
    
    def __init__(self, n_rows, n_cols, positions, num_of_pedestrian, sl_coverage, md_coverage, action): # the action will be taken from Agent class
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.positions = positions
        self.num_of_pedestrian = num_of_pedestrian
        self.tds = tds
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.PM_class = PedestriansMaker(self.n_rows, self.n_cols, self.positions, self.num_of_pedestrian, self.tds)
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)
        self.grid_pedestrians = self.create_location_grid(self.n_rows, self.n_cols, self.num_of_pedestrian, self.PM_class.pedestrians_s)
        self.grid_sls = self.create_sl_grid(self.n_rows, self.n_cols, self.SLM_class.sl_locs)
        self.grid_brightness = self.create_brightness_grid(action, self.n_rows, self.n_cols, self.SLM_class.sls_cvrg_area)

    @staticmethod
    def create_location_grid(n_rows, n_cols, num_of_pedestrian, pedestrians_s):
        grid_pedestrian = []
        for idx_row in range(n_rows):
            row = []
            for idx_column in range(n_cols):
                row.append(0)
            grid_pedestrian.append(row)
        for idx_pedestrian in range(num_of_pedestrian):
            pedestrian_s = pedestrians_s[idx_pedestrian]
            grid_pedestrian[pedestrian_s[ROW]][pedestrian_s[COLUMN]] += 1
        return grid_pedestrian # return the grid and each pedestrian position on the grid

    @staticmethod
    def create_sl_grid(n_rows, n_cols, sl_locs):
        grid_sl = []
        for idx_row in range(n_rows):
            row = []
            for idx_column in range(n_cols):
                row.append(0)
            grid_sl.append(row)
        for idx_sl in range(len(sl_locs)):
            sl_loc = sl_locs[idx_sl]
            grid_sl[sl_loc[ROW]][sl_loc[COLUMN]] = 1
        return grid_sl # return the grid and each street light position on the grid

    @staticmethod
    def create_brightness_grid(action, n_rows, n_cols, sls_cvrg_area):
        grid_sl_brightness = []
        assert len(action) == len(sls_cvrg_area)
        for idx_row in range(n_rows):
            row = []
            for idx_column in range(n_cols):
                row.append(0)
            grid_sl_brightness.append(row)
        for idx_action in range(len(action)):
            sl_cvrg = sls_cvrg_area[idx_action]
            sl_action = action[idx_action]
            for idx_cvrg in range(len(sl_cvrg)):
                cvrg_coor = sl_cvrg[idx_cvrg]
                cvrg_row = cvrg_coor[ROW]
                cvrg_col = cvrg_coor[COLUMN]
                if sl_action == 1: # will be generalized
                    grid_sl_brightness[cvrg_row][cvrg_col] = 1
                elif sl_action == 2: # will be generalized
                    grid_sl_brightness[cvrg_row][cvrg_col] = 2
                else: # will be generalized
                    grid_sl_brightness[cvrg_row][cvrg_col] = 3
        return grid_sl_brightness # return grid for brightness

In [38]:
print('test for visualization')
action = agent.action(obs, agent.SLM_class.sl_locs, brightness_lvl)
obs = env.reset() # observation required to test the agent
vis = Visualization(n_rows, n_cols, positions, num_of_pedestrian, sl_coverage, md_coverage, action)
print('pedestrian location grid = {}'.format(vis.grid_pedestrians))
print('street light on grid = {}'.format(vis.grid_sls))
print('brightness grid = {}'.format(vis.grid_brightness))

test for visualization
pedestrian location grid = [[3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7]]
street light on grid = [[0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0]]
brightness grid = [[0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 0]]


In [67]:
class Environment():

    def __init__(self, n_rows, n_cols, positions, num_of_pedestrian, tds, sl_coverage, md_coverage, nl_opt):
        self.time = 0
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.positions = positions
        self.num_of_pedestrian = num_of_pedestrian
        self.tds = tds
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.nl_opt = nl_opt
        self.PM_class = PedestriansMaker(self.n_rows, self.n_cols, self.positions, self.num_of_pedestrian, self.tds)
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)
        self.list_current_pedestrian = self.PM_class.pedestrians_s.copy()
        self.nl = self.natural_light(self.nl_opt)

    @staticmethod
    def natural_light(nl_opt):
        period_length = n_cols + max(tds) - 1
        period_1 = 1 + math.floor(period_length/len(nl_opt))
        period_2 = period_4 = period_5 = math.floor(period_length/len(nl_opt))
        period_3 = period_length - period_1 - period_2 - period_4 - period_5
        period = [period_1, period_2, period_3, period_4, period_5]
        nl = []
        for idx_nl in range(len(nl_opt)):
            nl.append(nl_opt[idx_nl] * period[idx_nl])
        flat_nl = [item for items in nl for item in items]
        return flat_nl

    @staticmethod
    def pedestrian_count(sls_cvrg_area, list_current_pedestrian):
        sl_pedestrian_count = []
        for idx_sl in range(len(sls_cvrg_area)):
            sl_cvrg_area = sls_cvrg_area[idx_sl]
            pedestrian_count = 0
            for idx_pedestrian in range(len(list_current_pedestrian)):
                current_pedestrian_loc = list_current_pedestrian[idx_pedestrian]
                test_coor = sl_cvrg_area.count((current_pedestrian_loc[ROW], current_pedestrian_loc[COLUMN]))
                pedestrian_count = pedestrian_count + test_coor
            if pedestrian_count == 0: 
                pedestrian_count = 1
            elif pedestrian_count >= 3: # will be generalized
                pedestrian_count = 3
            sl_pedestrian_count.append(pedestrian_count)
        return sl_pedestrian_count

    @staticmethod
    def obs(sls_cvrg_area, list_current_pedestrian):
        list_pedestrian_detected = []
        for idx_sl in range(len(sls_cvrg_area)):
            sl_cvrg_area = sls_cvrg_area[idx_sl]
            test_coor = 0
            for idx_pedestrian in range(len(list_current_pedestrian)):
                current_pedestrian_loc = list_current_pedestrian[idx_pedestrian]
                pedestrian_count = sl_cvrg_area.count((current_pedestrian_loc[ROW], current_pedestrian_loc[COLUMN]))
                test_coor = test_coor + pedestrian_count
            if test_coor == 0:
                list_pedestrian_detected.append(test_coor)
            else:
                list_pedestrian_detected.append(1)
        return list_pedestrian_detected
    
    @staticmethod
    def reward(action, sl_pedestrian_count, nl, time):
        reward_sl = []
        for idx_sl in range(len(action)):
            sl_action = action[idx_sl]
            pedestrian_count = sl_pedestrian_count[idx_sl]
            if sl_action == pedestrian_count:
                if sl_action < nl[time]:
                    sl_reward = sl_action - nl[time]
                else:
                    sl_reward = 0
            elif sl_action < pedestrian_count:
                if sl_action < nl[time]:
                    sl_reward = (2*sl_action) - pedestrian_count - nl[time]
                else:
                    sl_reward = sl_action - pedestrian_count
            else:
                if sl_action < nl[time]:
                    sl_reward = pedestrian_count - nl[time]
                else:
                    sl_reward = pedestrian_count - sl_action
            reward_sl.append(sl_reward)
        reward = sum(reward_sl)
        return reward

    @staticmethod
    def done(list_current_pedestrian, pedestrians_d):
        status = []
        for idx_pedestrian in range(len(list_current_pedestrian)):
            current_pedestrian_loc = list_current_pedestrian[idx_pedestrian]
            pedestrian_d = pedestrians_d[idx_pedestrian]
            if current_pedestrian_loc == pedestrian_d:
                status.append(True)
            else:
                status.append(False)
        done = all(status)
        return done

    @staticmethod
    def info():
        pass

    def reset(self):
        self.time = 0
        self.PM_class = PedestriansMaker(self.n_rows, self.n_cols, self.positions, self.num_of_pedestrian, self.tds)
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)
        self.list_current_pedestrian = self.PM_class.pedestrians_s.copy()
        list_pedestrian_detected = self.obs(self.SLM_class.sls_cvrg_area, self.list_current_pedestrian)
        return list_pedestrian_detected

    def step(self, action):
        self.time = self.time + 1
        self.list_current_pedestrian = self.PM_class.pedestrian_loc_update(self.list_current_pedestrian, self.PM_class.pedestrians_pathway, self.PM_class.pedestrians_td, self.PM_class.pedestrians_d, self.time)
        list_pedestrian_detected = self.obs(self.SLM_class.sls_cvrg_area, self.list_current_pedestrian)
        done = self.done(self.list_current_pedestrian, self.PM_class.pedestrians_d)
        reward = self.reward(action, self.pedestrian_count(self.SLM_class.sls_cvrg_area, self.list_current_pedestrian), self.nl, self.time)
        info = {
            'time' : self.time,
            'action' : action,
            'natural light' : self.nl[self.time],
            'sl pedestrian count' : self.pedestrian_count(self.SLM_class.sls_cvrg_area, self.list_current_pedestrian),
            'curent pedestrian loc' : self.list_current_pedestrian
        }
        return list_pedestrian_detected, done, reward, info # info will be added later

    # def render(self):
    #     print(self.grid_pedestrian)


In [68]:
print('test for environment')
env = Environment(n_rows, n_cols, positions, num_of_pedestrian, tds, sl_coverage, md_coverage, nl_opt)
print('reset = {}'.format(env.reset()))
obs = env.reset()
action = agent.action(obs, agent.SLM_class.sl_locs, brightness_lvl)
print('pedestrian starting pt : {}'.format(env.list_current_pedestrian))
print('pedestrian time delay : {}'.format(env.PM_class.pedestrians_td))
print('natural light time : {}'.format(env.nl))

test for environment
reset = [0, 0, 0]


TypeError: Agent.action() missing 1 required positional argument: 'brightness_lvl'

In [12]:
# agent 1: random action
class Agent():
    
    def __init__(self, n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl):
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.brightness_lvl = brightness_lvl
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)

    @staticmethod
    def action(obs, sl_locs, brightness_lvl):
        action = []
        for idx_sl in range(len(sl_locs)):
            brightness = random.choice(brightness_lvl)
            action.append(brightness)
        return action

In [13]:
print('test for agent')
agent = Agent(n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl)
obs = env.reset() # observation required to test the agent
print('action = {}'.format(agent.action(obs, agent.SLM_class.sl_locs, brightness_lvl)))  

test for agent
action = [1, 3, 3]


In [14]:
print('final test')
obs = env.reset()
done = False
total_reward = 0
while not done:
    action = agent.action(obs, agent.SLM_class.sl_locs, brightness_lvl)
    obs, done, reward, info = env.step(action)
    print('obs : {}'.format(obs))
    print('done : {}'.format(done))
    print('reward : {}'.format(reward))
    print('info : {}'.format(info))
    total_reward += sum(reward)
    print('total reward : {}'.format(total_reward))

final test
obs : [0, 0, 0]
done : False
reward : [0, -1, -2]
info : {'time': 1, 'action': [1, 2, 3], 'natural light': 1, 'sl pedestrian count': [1, 1, 1], 'curent pedestrian loc': [(0, 16), (0, 16), (0, 0), (0, 0), (0, 16), (0, 0), (0, 0), (0, 0), (0, 16), (0, 16)]}
total reward : -3
obs : [1, 0, 0]
done : False
reward : [-1, -2, 0]
info : {'time': 2, 'action': [3, 3, 1], 'natural light': 1, 'sl pedestrian count': [2, 1, 1], 'curent pedestrian loc': [(0, 16), (0, 16), (0, 0), (0, 0), (0, 16), (0, 1), (0, 1), (0, 0), (0, 16), (0, 16)]}
total reward : -6
obs : [1, 0, 0]
done : False
reward : [-1, 0, -2]
info : {'time': 3, 'action': [3, 1, 3], 'natural light': 1, 'sl pedestrian count': [2, 1, 1], 'curent pedestrian loc': [(0, 16), (0, 16), (0, 0), (0, 0), (0, 16), (0, 2), (0, 2), (0, 0), (0, 16), (0, 16)]}
total reward : -9
obs : [1, 0, 0]
done : False
reward : [-1, 0, 0]
info : {'time': 4, 'action': [2, 1, 1], 'natural light': 1, 'sl pedestrian count': [3, 1, 1], 'curent pedestrian loc':

In [None]:
# agent 2: all 3 action
class Agent():
    
    def __init__(self, n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl):
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.brightness_lvl = brightness_lvl
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)

    @staticmethod
    def action(obs, sl_locs, brightness_lvl):
        action = []
        for idx_sl in range(len(sl_locs)):
            brightness = 3
            action.append(brightness)
        return action


In [88]:
# agent 3: with q-learning
import itertools
from os import stat
import numpy as np

# variables
episodes = 100      # total number of episodes
alpha = 0.5         # learning rate
gamma = 0.9         # discount factor

class Agent():
    
    def __init__(self, n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl):
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.brightness_lvl = brightness_lvl
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)
        self.state_space = 2 ** len(self.SLM_class.sl_locs)
        self.action_space = 3 ** len(self.SLM_class.sl_locs)
        self.qtable = np.zeros([self.state_space, self.action_space])
    
    @staticmethod
    def state_idx(state):
        state_bin = ''.join(map(str,state))
        state_idx = 0
        for idx in range(len(state_bin)):
            i = len(state_bin) - 1 - idx
            state_idx += int(state_bin[i]) * (2 ** idx)
        return state_idx

    @staticmethod
    def idx_state(state_idx):
        state = list(bin(state_idx))
        del state[0:2]
        state = list(map(int, state))
        return state

    @staticmethod
    def action_idx(action):
        action_copy = np.array(action)
        action_copy.fill(1)
        action_update = action - action_copy
        action_ter = ''.join(map(str, action_update))
        action_idx = 0
        for idx in range(len(action_ter)):
            i = len(action_ter) - 1 - idx
            action_idx += int(action_ter[i]) * (3 ** idx)
        return action_idx

    @staticmethod
    def idx_action(action_idx):
        action_ter = []
        done = False
        current_idx = action_idx
        while not done:
            reminder = current_idx % 3
            action_ter.append(reminder)
            current_idx = current_idx // 3
            if current_idx == 0:
                done = True
        action_ter.reverse()
        action_copy = np.array(action_ter)
        action_copy.fill(1)
        action = action_ter + action_copy
        return list(action)
        
    def learn(self, qtable, obs, action, alpha, gamma, reward, new_obs):
        state_idx = self.state_idx(obs)
        action_idx = self.action_idx(action)
        new_state_idx = self.state_idx(new_obs)
        old_value = qtable[state_idx, action_idx]
        qtable[state_idx, action_idx] = old_value + (alpha * (reward + (gamma * np.max(qtable[new_state_idx, :])) - old_value))
        return qtable

    def action(self, qtable, obs, sl_locs, brightness_lvl):
        state_idx = self.state_idx(obs)
        flat_qtable = np.ravel(qtable)
        if np.all(flat_qtable == flat_qtable[0]):
            action = []
            for idx_sl in range(len(sl_locs)):
                brightness = random.choice(brightness_lvl)
                action.append(brightness)
        else:
            action_idx = np.argmax(qtable[state_idx])
            action = self.idx_action(action_idx)
        return action

    

        

# print('qtable before training:')
# print(qtable)

# state = env.reset()
# state_str = ''.join(map(str,state))
# idx_state = 0
# for idx in range(len(state_str)):
#     i = len(state_str) - 1 - idx
#     idx_state += int(state_str[i]) * (2 ** idx)

# # training
# for idx_episodes in range(episodes):
#     state = env.reset()
#     done = False
#     outcomes.append("Failure")
#     while not done:
#         state_str = ''.join(map(str,state))
#         idx_state = 0
#         for idx in range(len(state_str)):
#             i = len(state_str) - 1 - idx
#             idx_state += int(state_str[i]) * (2 ** idx)
        
#         if np.min(qtable[idx_state]) < 0:
#             action = np.argmax(qtable[idx_state])
#         else:
#             action = agent.action(state, agent.SLM_class.sl_locs, brightness_lvl)
#         new_state, reward, done, info = env.step(action)
#         state = new_state
#         total_reward += reward

# avg_reward = total_reward / episodes
# print('average reward : {}'.format(avg_reward))


In [99]:
agent = Agent(n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl)
qtable = agent.qtable
print('final test')
obs = env.reset()
done = False
total_reward = 0
for i in range(episodes):
    # for i in range(2):
    while not done:
        print(qtable[agent.state_idx(obs)])
        action = agent.action(qtable, obs, agent.SLM_class.sl_locs, brightness_lvl)
        new_obs, done, reward, info = env.step(action)
        print('obs : {}'.format(obs))
        print('done : {}'.format(done))
        print('reward : {}'.format(reward))
        print('info : {}'.format(info))
        total_reward += reward
        print('total reward : {}'.format(total_reward))
        qtable = agent.learn(qtable, obs, action, alpha, gamma, reward, new_obs)
        print(qtable[agent.state_idx(new_obs)])
        obs = new_obs



final test
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
obs : [0, 0, 0]
done : False
reward : -3
info : {'time': 1, 'action': [3, 3, 1], 'natural light': 1, 'sl pedestrian count': [2, 1, 1], 'curent pedestrian loc': [(0, 0), (0, 16), (0, 16), (0, 16), (0, 0), (0, 0), (0, 1), (0, 16), (0, 1), (0, 16)]}
total reward : -3
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
obs : [1, 0, 0]
done : False
reward : -1
info : {'time': 2, 'action': [1], 'natural light': 1, 'sl pedestrian count': [2, 1, 1], 'curent pedestrian loc': [(0, 0), (0, 16), (0, 16), (0, 15), (0, 0), (0, 0), (0, 2), (0, 16), (0, 2), (0, 16)]}
total reward : -4
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0.]
obs : [1, 0, 1]
done : False
reward : -2
info

In [103]:
agent = Agent(n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl)
qtable = agent.qtable
print('final test')
obs = env.reset()
done = False
total_reward = 0
for i in range(episodes):
    # for i in range(2):
    while not done:
        action = agent.action(qtable, obs, agent.SLM_class.sl_locs, brightness_lvl)
        new_obs, done, reward, info = env.step(action)
        qtable = agent.learn(qtable, obs, action, alpha, gamma, reward, new_obs)
        obs = new_obs
print(qtable)

final test
[[ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  -1.5
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [-1.  -0.5 -0.5 -0.5 -1.  -1.5  0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [-1.5 -0.5 -1.

In [81]:
state_space = 4
action_space = 9
qtable = np.zeros((state_space, action_space))

print(qtable[2,3])


0.0


In [30]:
agent = Agent(n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl)
a = [1,1,1,0,0,1]
b = 3
c = [3,1,2,1]
state_idx = agent.state_idx(a)
state = agent.idx_state(b, agent.SLM_class.sl_locs)
action_idx = agent.action_idx(c)
action = agent.idx_action(b, agent.SLM_class.sl_locs)
print(state_idx)
print(state)
print(action_idx)
print(action)

[0. 0. 0.] [1, 1]
57
[0. 1. 1.]
57
[1, 2, 1]


In [91]:
action_idx = 19

action_ter = []
done = False
current_idx = action_idx
reminder = current_idx % 3
action_ter.append(reminder)
current_idx = current_idx // 3
print(current_idx)

# while not done:
#     reminder = current_idx % 3
#     action_ter.append(reminder)
#     current_idx = current_idx // 3
#     if current_idx == 0:
#         done = True

6


In [49]:
a = [2,3,1,3,1,3,1,1]
b = np.array(a)
b.fill(1)
a = a - b
c = ''.join(map(str, a))
print(c)

12020200


In [11]:
a = 75
bin_a = bin(a)
print(bin_a)

0b1001011


In [20]:
print(int(b,2))

155


In [41]:
state_str = 15
state = list(bin(state_str))
del state[0:2]
state = list(map(int, state))
print(state)

[1, 1, 1, 1]


In [29]:
# import:
import random
import itertools
import math

# indexing:
STARTING = 0
DESTINATION = 1
ROW = 0
COLUMN = 1

# adjustable variables:
n_rows = 1                                                                          # number of rows of the street
n_cols = 11                                     # +2 for the "standing" slot        # number of columns of the street
num_of_pedestrian = 2                                                              # number of pedestrians
sl_coverage = 1                                                                     # street light coverage area (excluding the light)
md_coverage = 1                                                                     # motion detection coverage area (excluding the light)
brightness_lvl_lowest = 1                                                           # lowest brightness level
brightness_lvl_highest = 3                                                          # highest brightness level
nl_opt = [[1],[2],[3],[2],[1]]                                                      # natural light level (3 = darkest)
# bounded variables:
positions = [(0,(n_rows - 1)), (0,(n_cols - 1))]                                    # number of end points
tds = list(range(1, (num_of_pedestrian + 1)))                                      # list of time delay
brightness_lvl = list(range(brightness_lvl_lowest, (brightness_lvl_highest + 1)))   # list of brightness level

class PedestriansMaker():
    
    def __init__(self, n_rows, n_cols, positions, num_of_pedestrian, tds):
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.positions = positions
        self.num_of_pedestrian = num_of_pedestrian
        self.tds = tds
        self.list_sd = self.create_starting_destination(self.positions)
        self.pedestrians_sd = self.create_pedestrians_sd(self.num_of_pedestrian, self.list_sd)
        self.pedestrians_s = self.pedestrians_sd[STARTING]
        self.pedestrians_d = self.pedestrians_sd[DESTINATION]
        self.pedestrians_pathway = self.create_pathway(self.pedestrians_s, self.num_of_pedestrian)
        self.pedestrians_td = self.create_pedestrians_td(self.pedestrians_s, self.tds)
  
    @staticmethod
    def create_starting_destination(positions):
        starting_pt = positions # since list mentioned in the init method, can we put self.positions?
        destination_pt = positions
        list_sd = []
        for starting, destination in itertools.product(starting_pt, destination_pt):
            if starting != destination:
                list_sd.append((starting, destination))
        return list_sd # return a list of tuple [starting destination option]

    @staticmethod
    def create_pedestrians_sd(num_of_pedestrian, list_sd):
        pedestrian_s = []
        pedestrian_d = []
        for idx_pedestrian_starting in range(num_of_pedestrian):
            pedestrian_sd = random.choice(list_sd)
            pedestrian_s.append(pedestrian_sd[STARTING])
            pedestrian_d.append(pedestrian_sd[DESTINATION])
            pedestrians_sd = [pedestrian_s, pedestrian_d]
        return pedestrians_sd # return list of tuple [starting][destination]
    
    @staticmethod
    def create_pathway(pedestrians_s, num_of_pedestrian):
        pedestrians_pathway = []
        for idx_pedestrian in range(num_of_pedestrian):
            pedestrian_s = pedestrians_s[idx_pedestrian]
            if pedestrian_s == (0,0): # will later generalize this
                row_update = 0
                col_update = 1
            else:
                row_update = 0
                col_update = -1
            pedestrians_pathway.append((row_update, col_update))
        return pedestrians_pathway # return the direction of a pedestrian

    @staticmethod
    def pedestrian_loc_update(list_current_pedestrian, pedestrians_pathway, pedestrians_td, pedestrians_d, time):
        updated_pedestrian_loc = []
        for idx_pedestrian in range(len(pedestrians_pathway)):
            pedestrian_loc = list_current_pedestrian[idx_pedestrian]
            pedestrian_row = pedestrian_loc[ROW]
            pedestrian_col = pedestrian_loc[COLUMN]
            pedestrian_pathway = pedestrians_pathway[idx_pedestrian]
            pedestrian_pathway_row = pedestrian_pathway[ROW]
            pedestrian_pathway_col = pedestrian_pathway[COLUMN]
            pedestrian_d = pedestrians_d[idx_pedestrian]
            pedestrian_td = pedestrians_td[idx_pedestrian]
            if pedestrian_loc != pedestrian_d:
                if pedestrian_td <= time:
                    update_pedestrian_row = pedestrian_row + pedestrian_pathway_row
                    update_pedestrian_col = pedestrian_col + pedestrian_pathway_col
                    updated_pedestrian_loc.append((update_pedestrian_row, update_pedestrian_col))
                else:
                    updated_pedestrian_loc.append((pedestrian_row, pedestrian_col))
            else:
                    updated_pedestrian_loc.append((pedestrian_row, pedestrian_col))
        return updated_pedestrian_loc # used to update the current location

    @staticmethod
    def create_pedestrians_td(pedestrians_s, tds):
        pedestrians_td = []
        for idx_pedestrian in range(len(pedestrians_s)):
            pedestrians_td.append(random.choice(tds))
        return pedestrians_td # return the list of pedestrians' time delay

class StreetLightMaker():
    def __init__(self, n_rows, n_cols, sl_coverage, md_coverage): # action will later be deleted ?
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.sl_locs = self.create_sl(self.n_rows, self.n_cols, self.sl_coverage)
        self.sls_cvrg_area = self.create_sl_coverage_area(self.n_rows, self.n_cols, self.sl_coverage, self.sl_locs)
        self.mds_cvrg_area = self.create_motion_detection_area(self.n_rows, self.n_cols, self.md_coverage, self.sl_locs)

    @staticmethod
    def create_sl(n_rows, n_cols, sl_coverage):
        n_rows = n_rows - 1 # should later changed to 2 when there are more than 1 row
        n_cols = n_cols - 2
        list_sl_rows = list(range((sl_coverage+1), n_rows, (sl_coverage*2+1)))
        list_sl_cols = list(range((sl_coverage+1), n_cols, (sl_coverage*2+1)))
        
        if list_sl_rows == []:
            list_sl_rows = [0]
            
        if list_sl_cols == []:
            list_sl_cols = [0]

        sl_locs = []
        for row, col in itertools.product(list_sl_rows, list_sl_cols):
            sl_locs.append((row,col))
        
        return sl_locs # return a list of tuple [street light coordinate]

    @staticmethod
    def create_sl_coverage_area(n_rows, n_cols, sl_coverage, sl_locs):
        sls_cvrg_area = []
        for idx_sl in range(len(sl_locs)):
            sl_loc = sl_locs[idx_sl]
            sl_row = sl_loc[ROW]
            sl_col = sl_loc[COLUMN]
            coverage_row = list(range((sl_row - sl_coverage), (sl_row + sl_coverage + 1)))
            coverage_col = list(range((sl_col - sl_coverage), (sl_col + sl_coverage + 1)))
            rows = list(range(0, (n_rows))) # will be adjusted like the cols when there are more than 1 row
            cols = list(range(1, (n_cols-1)))
            coverage_row = list(x for x in coverage_row if x in rows)
            coverage_col = list(x for x in coverage_col if x in cols)
            coverage_coor = []
            for row, col in itertools.product(coverage_row, coverage_col):
                coverage_coor.append((row,col))
            sls_cvrg_area.append(coverage_coor)
        return sls_cvrg_area # return list of list of tuple

    @staticmethod
    def create_motion_detection_area(n_rows, n_cols, md_coverage, sl_locs):
        mds_cvrg_area = []
        for idx_sl in range(len(sl_locs)):
            sl_loc = sl_locs[idx_sl]
            sl_row = sl_loc[ROW]
            sl_col = sl_loc[COLUMN]
            coverage_row = list(range((sl_row - md_coverage), (sl_row + md_coverage + 1)))
            coverage_col = list(range((sl_col - md_coverage), (sl_col + md_coverage + 1)))
            rows = list(range(0, (n_rows))) # will be adjusted like the cols when there are more than 1 row
            cols = list(range(1, (n_cols-1)))
            coverage_row = list(x for x in coverage_row if x in rows)
            coverage_col = list(x for x in coverage_col if x in cols)
            coverage_coor = []
            for row, col in itertools.product(coverage_row, coverage_col):
                coverage_coor.append((row,col))
            mds_cvrg_area.append(coverage_coor)
        return mds_cvrg_area # return list of list of tuple


class Environment():

    def __init__(self, n_rows, n_cols, positions, num_of_pedestrian, tds, sl_coverage, md_coverage, nl_opt):
        self.time = 0
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.positions = positions
        self.num_of_pedestrian = num_of_pedestrian
        self.tds = tds
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.nl_opt = nl_opt
        self.PM_class = PedestriansMaker(self.n_rows, self.n_cols, self.positions, self.num_of_pedestrian, self.tds)
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)
        self.list_current_pedestrian = self.PM_class.pedestrians_s.copy()
        self.nl = self.natural_light(self.nl_opt)

    @staticmethod
    def natural_light(nl_opt):
        period_length = n_cols + max(tds)
        period_1 = 1 + math.floor(period_length/len(nl_opt))
        period_2 = period_4 = period_5 = math.floor(period_length/len(nl_opt))
        period_3 = period_length - period_1 - period_2 - period_4 - period_5
        period = [period_1, period_2, period_3, period_4, period_5]
        nl = []
        for idx_nl in range(len(nl_opt)):
            nl.append(nl_opt[idx_nl] * period[idx_nl])
        flat_nl = [item for items in nl for item in items]
        return flat_nl

    @staticmethod
    def pedestrian_count(sls_cvrg_area, list_current_pedestrian):
        sl_pedestrian_count = []
        for idx_sl in range(len(sls_cvrg_area)):
            sl_cvrg_area = sls_cvrg_area[idx_sl]
            pedestrian_count = 0
            for idx_pedestrian in range(len(list_current_pedestrian)):
                current_pedestrian_loc = list_current_pedestrian[idx_pedestrian]
                test_coor = sl_cvrg_area.count((current_pedestrian_loc[ROW], current_pedestrian_loc[COLUMN]))
                pedestrian_count = pedestrian_count + test_coor
            if pedestrian_count == 0: 
                pedestrian_count = 1
            elif pedestrian_count >= 3: # will be generalized
                pedestrian_count = 3
            sl_pedestrian_count.append(pedestrian_count)
        return sl_pedestrian_count

    @staticmethod
    def obs(sls_cvrg_area, list_current_pedestrian):
        list_pedestrian_detected = []
        for idx_sl in range(len(sls_cvrg_area)):
            sl_cvrg_area = sls_cvrg_area[idx_sl]
            test_coor = 0
            for idx_pedestrian in range(len(list_current_pedestrian)):
                current_pedestrian_loc = list_current_pedestrian[idx_pedestrian]
                pedestrian_count = sl_cvrg_area.count((current_pedestrian_loc[ROW], current_pedestrian_loc[COLUMN]))
                test_coor = test_coor + pedestrian_count
            if test_coor == 0:
                list_pedestrian_detected.append(test_coor)
            else:
                list_pedestrian_detected.append(1)
        return list_pedestrian_detected
    
    @staticmethod
    def reward(action, sl_pedestrian_count, nl, time):
        reward_sl = []
        for idx_sl in range(len(action)):
            sl_action = action[idx_sl]
            pedestrian_count = sl_pedestrian_count[idx_sl]
            if sl_action == pedestrian_count:
                if sl_action < nl[time]:
                    sl_reward = sl_action - nl[time]
                else:
                    sl_reward = 0
            elif sl_action < pedestrian_count:
                if sl_action < nl[time]:
                    sl_reward = (2*sl_action) - pedestrian_count - nl[time]
                else:
                    sl_reward = sl_action - pedestrian_count
            else:
                if sl_action < nl[time]:
                    sl_reward = pedestrian_count - nl[time]
                else:
                    sl_reward = pedestrian_count - sl_action
            reward_sl.append(sl_reward)
        print(reward_sl)
        reward = sum(reward_sl)
        return reward

    @staticmethod
    def done(list_current_pedestrian, pedestrians_d):
        status = []
        for idx_pedestrian in range(len(list_current_pedestrian)):
            current_pedestrian_loc = list_current_pedestrian[idx_pedestrian]
            pedestrian_d = pedestrians_d[idx_pedestrian]
            if current_pedestrian_loc == pedestrian_d:
                status.append(True)
            else:
                status.append(False)
        done = all(status)
        return done

    @staticmethod
    def info():
        pass

    def reset(self):
        self.time = 0
        self.PM_class = PedestriansMaker(self.n_rows, self.n_cols, self.positions, self.num_of_pedestrian, self.tds)
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)
        self.list_current_pedestrian = self.PM_class.pedestrians_s.copy()
        list_pedestrian_detected = self.obs(self.SLM_class.sls_cvrg_area, self.list_current_pedestrian)
        return list_pedestrian_detected

    def step(self, action):
        self.time = self.time + 1
        self.list_current_pedestrian = self.PM_class.pedestrian_loc_update(self.list_current_pedestrian, self.PM_class.pedestrians_pathway, self.PM_class.pedestrians_td, self.PM_class.pedestrians_d, self.time)
        list_pedestrian_detected = self.obs(self.SLM_class.sls_cvrg_area, self.list_current_pedestrian)
        done = self.done(self.list_current_pedestrian, self.PM_class.pedestrians_d)
        reward = self.reward(action, self.pedestrian_count(self.SLM_class.sls_cvrg_area, self.list_current_pedestrian), self.nl, self.time)
        info = {
            'time' : self.time,
            'action' : action,
            'natural light' : self.nl[self.time],
            'sl pedestrian count' : self.pedestrian_count(self.SLM_class.sls_cvrg_area, self.list_current_pedestrian),
            'curent pedestrian loc' : self.list_current_pedestrian
        }
        return list_pedestrian_detected, done, reward, info # info will be added later

    # def render(self):
    #     print(self.grid_pedestrian)


import itertools
from os import stat
import numpy as np

# variables
episodes = 100      # total number of episodes
alpha = 0.5         # learning rate
gamma = 0.9         # discount factor

class Agent():
    
    def __init__(self, n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl):
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.sl_coverage = sl_coverage
        self.md_coverage = md_coverage
        self.brightness_lvl = brightness_lvl
        self.SLM_class = StreetLightMaker(self.n_rows, self.n_cols, self.sl_coverage, self.md_coverage)
        self.state_space = 2 ** len(self.SLM_class.sl_locs)
        self.action_space = 3 ** len(self.SLM_class.sl_locs)
        self.qtable = np.zeros([self.state_space, self.action_space])
    
    @staticmethod
    def state_idx(state):
        state_bin = ''.join(map(str,state))
        state_idx = 0
        for idx in range(len(state_bin)):
            i = len(state_bin) - 1 - idx
            state_idx += int(state_bin[i]) * (2 ** idx)
        return state_idx

    @staticmethod
    def idx_state(state_idx, sl_locs):
        state = np.zeros(len(sl_locs))
        state_bin = list(bin(state_idx))
        del state_bin[0:2]
        state_bin = list(map(int, state_bin))
        for idx_bin in range(len(state_bin)):
            state_len = len(sl_locs)
            idx = state_len - 1 - idx_bin
            state[idx] = state_bin[idx_bin]
        return state

    @staticmethod
    def action_idx(action):
        action_copy = np.array(action)
        action_copy.fill(1)
        action_update = action - action_copy
        action_ter = ''.join(map(str, action_update))
        action_idx = 0
        for idx in range(len(action_ter)):
            i = len(action_ter) - 1 - idx
            action_idx += int(action_ter[i]) * (3 ** idx)
        return action_idx

    @staticmethod
    def idx_action(action_idx, sl_locs):
        action_ter = np.zeros(len(sl_locs))
        current_idx = action_idx
        for idx in range(len(action_ter)):
            reminder = current_idx % 3
            action_ter[len(action_ter) - 1 - idx] = reminder
            current_idx = current_idx // 3
        action_copy = np.array(action_ter)
        action_copy.fill(1)
        action = action_ter + action_copy
        return list(map(int, action))
        
    def learn(self, qtable, obs, action, alpha, gamma, reward, new_obs):
        state_idx = self.state_idx(obs)
        action_idx = self.action_idx(action)
        new_state_idx = self.state_idx(new_obs)
        old_value = qtable[state_idx, action_idx]
        qtable[state_idx, action_idx] = old_value + (alpha * (reward + (gamma * np.max(qtable[new_state_idx, :])) - old_value))
        return qtable

    def action(self, qtable, obs, sl_locs, brightness_lvl):
        state_idx = self.state_idx(obs)
        flat_qtable = np.ravel(qtable)
        if np.all(flat_qtable == flat_qtable[0]):
            action = []
            for idx_sl in range(len(sl_locs)):
                brightness = random.choice(brightness_lvl)
                action.append(brightness)
        else:
            action_idx = np.argmax(qtable[state_idx])
            action = self.idx_action(action_idx, sl_locs)
        return action


In [10]:
print('test for pedestrian maker')
pm = PedestriansMaker(n_rows, n_cols, positions, num_of_pedestrian, tds)
print('starting destination combination = {}'.format(pm.list_sd))
print('pedestrian starting destination combination = {}'.format(pm.pedestrians_sd))
print('pedestrian starting = {}'.format(pm.pedestrians_s))
print('pedestrian destination = {}'.format(pm.pedestrians_d))
print('pedestrian pathway = {}'.format(pm.pedestrians_pathway))
print('pedestrian time delay = {}'.format(pm.pedestrians_td))

print('test for street light')
slm = StreetLightMaker(n_rows, n_cols, sl_coverage, md_coverage)
print('list of street light coordinate = {}'.format(slm.sl_locs))
print('coverage coordinate for each light = {}'.format(slm.sls_cvrg_area))
print('motion detection coverage area = {}'.format(slm.mds_cvrg_area))

print('test for environment')
env = Environment(n_rows, n_cols, positions, num_of_pedestrian, tds, sl_coverage, md_coverage, nl_opt)
print('reset = {}'.format(env.reset()))
obs = env.reset()
action = agent.action(agent.qtable, obs, agent.SLM_class.sl_locs, brightness_lvl)
print('pedestrian starting pt : {}'.format(env.list_current_pedestrian))
print('pedestrian time delay : {}'.format(env.PM_class.pedestrians_td))
print('natural light time : {}'.format(env.nl))

print('test for agent')
agent = Agent(n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl)
obs = env.reset() # observation required to test the agent
print('action = {}'.format(agent.action(agent.qtable, obs, agent.SLM_class.sl_locs, brightness_lvl)))  

test for pedestrian maker
starting destination combination = [((0, 0), (0, 11)), ((0, 11), (0, 0))]
pedestrian starting destination combination = [[(0, 11), (0, 0)], [(0, 0), (0, 11)]]
pedestrian starting = [(0, 11), (0, 0)]
pedestrian destination = [(0, 0), (0, 11)]
pedestrian pathway = [(0, -1), (0, 1)]
pedestrian time delay = [2, 1]
test for street light
list of street light coordinate = [(0, 3), (0, 8)]
coverage coordinate for each light = [[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)], [(0, 6), (0, 7), (0, 8), (0, 9), (0, 10)]]
motion detection coverage area = [[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)], [(0, 6), (0, 7), (0, 8), (0, 9), (0, 10)]]
test for environment
reset = [0, 0]
pedestrian starting pt : [(0, 11), (0, 0)]
pedestrian time delay : [2, 1]
natural light time : [1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 2, 2, 1, 1]
test for agent
action = [3, 1]


In [11]:
agent = Agent(n_rows, n_cols, sl_coverage, md_coverage, brightness_lvl)
qtable = agent.qtable
print('final test')
obs = env.reset()
done = False
total_reward = 0
for i in range(episodes):
    # for i in range(2):
    while not done:
        action = agent.action(qtable, obs, agent.SLM_class.sl_locs, brightness_lvl)
        new_obs, done, reward, info = env.step(action)
        print('obs : {}'.format(new_obs))
        print('done : {}'.format(done))
        print('reward : {}'.format(reward))
        print('info : {}'.format(info))
        total_reward += reward
        print('total reward : {}'.format(total_reward))
        qtable = agent.learn(qtable, obs, action, alpha, gamma, reward, new_obs)
        print(qtable[agent.state_idx(new_obs)])
        obs = new_obs
print(qtable)

final test
[-2, -1]
obs : [0, 1]
done : False
reward : -3
info : {'time': 1, 'action': [3, 3], 'natural light': 1, 'sl pedestrian count': [1, 2], 'curent pedestrian loc': [(0, 10), (0, 10)]}
total reward : -3
[0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0, -1]
obs : [0, 1]
done : False
reward : -1
info : {'time': 2, 'action': [1, 1], 'natural light': 1, 'sl pedestrian count': [1, 2], 'curent pedestrian loc': [(0, 9), (0, 9)]}
total reward : -4
[-0.5  0.   0.   0.   0.   0.   0.   0.   0. ]
[-1, 0]
obs : [0, 1]
done : False
reward : -1
info : {'time': 3, 'action': [1, 2], 'natural light': 2, 'sl pedestrian count': [1, 2], 'curent pedestrian loc': [(0, 8), (0, 8)]}
total reward : -5
[-0.5 -0.5  0.   0.   0.   0.   0.   0.   0. ]
[-1, -1]
obs : [0, 1]
done : False
reward : -2
info : {'time': 4, 'action': [1, 3], 'natural light': 2, 'sl pedestrian count': [1, 2], 'curent pedestrian loc': [(0, 7), (0, 7)]}
total reward : -7
[-0.5 -0.5 -1.   0.   0.   0.   0.   0.   0. ]
[-2, -3]
obs : [0, 1]
done : False


[1, 3]
