In [10]:
from Covid19Environment import GraphicCovid19Environment
import numpy as np
import random
import enum
import math

In [3]:
class States(enum.Enum): 
    NOTHING = 0
    HOUSE = 1
    SOMEONE = 2
    KIWI = 3
    BORDER = 4

class Actions(enum.Enum): 
    LEFT = 0
    RIGHT = 1
    FORWARD = 2

In [4]:
class MovableObj():
    up = [0, 1]
    down = [0, -1]
    right = [1, 0] 
    left = [-1, 0]

    def __init__(self, start_pos):
        self.directions = [[1, 3], [2, 0], [3, 1], [0, 2]]
        self.moves = [[[-1, -1], [1, -1]], [[1, -1], [1, 1]], [[1, 1], [-1, 1]], [[-1, 1],[-1, -1]]]
        self.states = [self.up, self.left, self.down, self.right]
        self.currentState = 0
        self.current_pos = start_pos
        
    def get_body_pos(self):
        return self.current_pos
    
    def get_vision_pos(self):
        return (self.current_pos[0] + self.states[self.currentState][0], self.current_pos[1] + self.states[self.currentState][1]) 
    
    def do_action(self, action_num):
        if action_num == Actions.FORWARD.value:
            self.move_forward()
        else:
            self.look_left(action_num)
            
    def move_forward(self):
        direction = self.states[self.currentState]
        self.current_pos = (self.current_pos[0] + direction[0], self.current_pos[1] + direction[1]) 
    
    def restart(self):
        self.currentState = 0

    def look_left(self, left = 0):
        nextState = self.directions[self.currentState][left]
        self.currentState = nextState

In [7]:
class Covid19Environment():
    
    have_been_to_kiwi = False
    total_actions = 0
    random_actions = 0

    def __init__(self, q_table, world_size = (5, 5), learning_rate = 0.7, discount_rate = 0.618):
        self.learning_rate = learning_rate
        self.discount_rate = discount_rate

        self.world_size = world_size
        self.environment_table = np.zeros(world_size, dtype = int)
        self.environment_table[0][0] = States.HOUSE.value
        self.environment_table[world_size[0]-1][world_size[1]-1] = States.KIWI.value
        self.q_table = q_table
        start_pos = [math.floor(world_size[0]/2), math.floor(world_size[1]/2)]
        self.survivor = MovableObj(start_pos)
        #print("vision: ", self.survivor.get_vision_pos())
        #print("direction: ", self.survivor.states[survivor.currentState]) 

    def is_within_bounds(self, pos):
        return pos[0] < self.world_size[0] and pos[1] < self.world_size[1] and pos[0] >= 0 and pos[1] >= 0

    def get_stand_state(self, stand_pos):
        if not self.is_within_bounds(stand_pos):
            return 4
        return self.environment_table[stand_pos[0]][stand_pos[1]]

    def get_see_state(self, see_pos):
        if not self.is_within_bounds(see_pos):
            return 4
        return self.environment_table[see_pos[0]][see_pos[1]]

    def get_state(self, stand_pos, see_pos):
        return self.get_stand_state(stand_pos), self.get_see_state(see_pos)

    def get_possible_actions_from_state(self, state):
        possible_actions = []
        for i in range(3):
            if(self.q_table[state[0]][state[1]][i] != -math.inf):
                possible_actions.append(i)
        return possible_actions

    def get_max_value_from_state(self, state):
        return max(self.q_table[state[0]][state[1]])

    def calculate_reward(self, state):
        if state[0] == States.NOTHING.value:
            return -1, False
        elif state[0] == States.SOMEONE.value:
            return -10, False
        elif state[0] == States.KIWI.value:
            if self.have_been_to_kiwi:
                return 0, False
            else:
                self.have_been_to_kiwi = True
                return 30, False
        elif state[0] == States.HOUSE.value:
            if self.have_been_to_kiwi:
                print("Wooow! House reached after Kiwi!")
                return 100, True
            else:
                print("House reached")
                return 30, True
            
            return 10, True
        elif state[0] == States.BORDER.value:
            print("Out of the border.")
            return -1000, True
        else:
            print("Something is wrong: ", state)

    def step(self):
        current_state = self.get_state(self.survivor.get_body_pos(), self.survivor.get_vision_pos())
        #print("current_state: ", current_state)
        possible_actions = self.get_possible_actions_from_state(current_state)
        #print("possible_actions: ", possible_actions)

        exp_exp_tradeoff = random.uniform(0,1)
        if exp_exp_tradeoff > epsilon:
            action_to_choose = np.argmax(self.q_table[current_state[0],current_state[1],:])
        else:
            action_to_choose = random.choice(possible_actions)
            self.random_actions += 1
        
        self.survivor.do_action(action_to_choose)

        new_state = self.get_state(self.survivor.get_body_pos(), self.survivor.get_vision_pos())
        #print("new_pos: ", survivor.get_body_pos(), survivor.get_vision_pos())
        #print("new_state: ", States(new_state[0]), States(new_state[1]))

        highest_Q_value = self.get_max_value_from_state(new_state)
        current_Q_value = self.q_table[current_state[0]][current_state[1]][action_to_choose]
        reward, done = self.calculate_reward(new_state)

        # New Q value =    Current Q value +    lr * [Reward + discount_rate * (highest Q value between possible actions from the new state s’ ) — Current Q value ]
        new_Q_value = current_Q_value + self.learning_rate * (reward + self.discount_rate * highest_Q_value - current_Q_value)

        self.q_table[current_state[0]][current_state[1]][action_to_choose] = new_Q_value
        self.total_actions += 1
        return done, reward
        #print("new_Q_value: ", new_Q_value)

In [8]:
# Parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.01             # Exponential decay rate for exploration prob

learning_rate = 0.7
discount_rate = 0.618

steps = 100
total_episodes = 300

#world_size = (5, 5)

# Init Q table
q_table = np.zeros((5, 5, 3))
# If you see border, then you should not go forward
for i in range(5):
    q_table[i][States.BORDER.value][Actions.FORWARD.value] = -math.inf
#print(q_table)

for episode in range(total_episodes):
    done = False
    total_reward = 0
    env = Covid19Environment(q_table)
    for s in range(steps):
        done, new_reward = env.step()
        total_reward += new_reward
        # If done : finish episode
        if done == True:
            break
    print(episode, " Finished: ", s, ", reward: ", total_reward, "Random/total actions: ", (env.random_actions*100)/env.total_actions)
        # Reduce epsilon (because we need less and less exploration)
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)

np.set_printoptions(precision=2, suppress=True)
print("\n\nQ Table:\n", q_table)

House reached
0  Finished:  27 , reward:  3 Random/total actions:  100.0
1  Finished:  99 , reward:  -100 Random/total actions:  100.0
Wooow! House reached after Kiwi!
2  Finished:  61 , reward:  74 Random/total actions:  100.0
House reached
3  Finished:  36 , reward:  -6 Random/total actions:  100.0
House reached
4  Finished:  82 , reward:  -52 Random/total actions:  97.59036144578313
House reached
5  Finished:  14 , reward:  16 Random/total actions:  100.0
6  Finished:  99 , reward:  -100 Random/total actions:  94.0
7  Finished:  99 , reward:  -100 Random/total actions:  89.0
8  Finished:  99 , reward:  -100 Random/total actions:  95.0
9  Finished:  99 , reward:  -30 Random/total actions:  92.0
House reached
10  Finished:  14 , reward:  16 Random/total actions:  93.33333333333333
House reached
11  Finished:  60 , reward:  -30 Random/total actions:  90.1639344262295
House reached
12  Finished:  88 , reward:  -58 Random/total actions:  91.01123595505618
13  Finished:  99 , reward:  -55

171  Finished:  61 , reward:  98 Random/total actions:  19.35483870967742
House reached
172  Finished:  51 , reward:  -21 Random/total actions:  21.153846153846153
House reached
173  Finished:  9 , reward:  21 Random/total actions:  10.0
Wooow! House reached after Kiwi!
174  Finished:  17 , reward:  116 Random/total actions:  16.666666666666668
Wooow! House reached after Kiwi!
175  Finished:  60 , reward:  73 Random/total actions:  26.229508196721312
House reached
176  Finished:  11 , reward:  19 Random/total actions:  16.666666666666668
House reached
177  Finished:  13 , reward:  17 Random/total actions:  14.285714285714286
House reached
178  Finished:  43 , reward:  -13 Random/total actions:  25.0
Wooow! House reached after Kiwi!
179  Finished:  73 , reward:  74 Random/total actions:  13.513513513513514
Wooow! House reached after Kiwi!
180  Finished:  62 , reward:  89 Random/total actions:  22.22222222222222
181  Finished:  99 , reward:  8 Random/total actions:  20.0
House reached
18

287  Finished:  54 , reward:  -24 Random/total actions:  9.090909090909092
House reached
288  Finished:  9 , reward:  21 Random/total actions:  0.0
House reached
289  Finished:  9 , reward:  21 Random/total actions:  10.0
House reached
290  Finished:  9 , reward:  21 Random/total actions:  0.0
House reached
291  Finished:  9 , reward:  21 Random/total actions:  0.0
Wooow! House reached after Kiwi!
292  Finished:  88 , reward:  57 Random/total actions:  10.112359550561798
293  Finished:  99 , reward:  15 Random/total actions:  6.0
House reached
294  Finished:  78 , reward:  -48 Random/total actions:  3.7974683544303796
295  Finished:  99 , reward:  26 Random/total actions:  8.0
Wooow! House reached after Kiwi!
296  Finished:  20 , reward:  118 Random/total actions:  4.761904761904762
Wooow! House reached after Kiwi!
297  Finished:  14 , reward:  118 Random/total actions:  6.666666666666667
Wooow! House reached after Kiwi!
298  Finished:  48 , reward:  84 Random/total actions:  4.0816326

In [9]:
# Graphical representation of the game
env = GraphicCovid19Environment()

PlottingGrid
FinishedPlotting
add_house
add_house
add_kiwi
add_kiwi
add_survivor
add_survivor
-1
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-2
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-3
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-4
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-5
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-6
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-7
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-8
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-9
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-10
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-11
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-12
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-13
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-14
State:  [3, 3]
[<States.NOTHING: 3>, <States.NOTHING: 3>]
-15
State:  [3, 3]
[<States.NOTHING: 3>, <State

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
