In [1]:
import numpy as np
import random
from graphics import *
import math
import traceback
import sys
import enum 

from MovableObj import MovableObj

In [2]:
class States(enum.Enum): 
    NOTHING = 0
    HOUSE = 1
    SOMEONE = 2
    KIWI = 3
    BORDER = 4

class Actions(enum.Enum): 
    LEFT = 0
    RIGHT = 1
    FORWARD = 2

In [56]:
 class Covid19Environment():
    
    have_been_to_kiwi = False
    total_actions = 0
    random_actions = 0

    def __init__(self, q_table, world_size = (5, 5), learning_rate = 0.7, discount_rate = 0.618):
        self.learning_rate = learning_rate
        self.discount_rate = discount_rate

        self.world_size = world_size
        self.environment_table = np.zeros(world_size, dtype = int)
        self.environment_table[0][0] = States.HOUSE.value
        self.environment_table[world_size[0]-1][world_size[1]-1] = States.KIWI.value
        self.q_table = q_table
        start_pos = [math.floor(world_size[0]/2), math.floor(world_size[1]/2)]
        self.survivor = MovableObj(start_pos)
        #print("vision: ", self.survivor.get_vision_pos())
        #print("direction: ", self.survivor.states[survivor.currentState]) 

    def is_within_bounds(self, pos):
        return pos[0] < self.world_size[0] and pos[1] < self.world_size[1] and pos[0] >= 0 and pos[1] >= 0

    def get_stand_state(self, stand_pos):
        if not self.is_within_bounds(stand_pos):
            return 4
        return self.environment_table[stand_pos[0]][stand_pos[1]]

    def get_see_state(self, see_pos):
        if not self.is_within_bounds(see_pos):
            return 4
        return self.environment_table[see_pos[0]][see_pos[1]]

    def get_state(self, stand_pos, see_pos):
        return self.get_stand_state(stand_pos), self.get_see_state(see_pos)

    def get_possible_actions_from_state(self, state):
        possible_actions = []
        for i in range(3):
            if(self.q_table[state[0]][state[1]][i] != -math.inf):
                possible_actions.append(i)
        return possible_actions

    def get_max_value_from_state(self, state):
        return max(self.q_table[state[0]][state[1]])

    def calculate_reward(self, state):
        if state[0] == States.NOTHING.value:
            return -1, False
        elif state[0] == States.SOMEONE.value:
            return -10, False
        elif state[0] == States.KIWI.value:
            if self.have_been_to_kiwi:
                return 0, False
            else:
                self.have_been_to_kiwi = True
                return 30, False
        elif state[0] == States.HOUSE.value:
            if self.have_been_to_kiwi:
                print("Wooow! House reached after Kiwi!")
                return 100, True
            else:
                print("House reached")
                return 30, True
            
            return 10, True
        elif state[0] == States.BORDER.value:
            print("Out of the border.")
            return -1000, True
        else:
            print("Something is wrong: ", state)

    def step(self):
        current_state = self.get_state(self.survivor.get_body_pos(), self.survivor.get_vision_pos())
        #print("current_state: ", current_state)
        possible_actions = self.get_possible_actions_from_state(current_state)
        #print("possible_actions: ", possible_actions)

        exp_exp_tradeoff = random.uniform(0,1)
        if exp_exp_tradeoff > epsilon:
            action_to_choose = np.argmax(self.q_table[current_state[0],current_state[1],:])
        else:
            action_to_choose = random.choice(possible_actions)
            self.random_actions += 1
        
        self.survivor.do_action(action_to_choose)

        new_state = get_state(self.survivor.get_body_pos(), self.survivor.get_vision_pos())
        #print("new_pos: ", survivor.get_body_pos(), survivor.get_vision_pos())
        #print("new_state: ", States(new_state[0]), States(new_state[1]))

        highest_Q_value = self.get_max_value_from_state(new_state)
        current_Q_value = self.q_table[current_state[0]][current_state[1]][action_to_choose]
        reward, done = self.calculate_reward(new_state)

        # New Q value =    Current Q value +    lr * [Reward + discount_rate * (highest Q value between possible actions from the new state s’ ) — Current Q value ]
        new_Q_value = current_Q_value + self.learning_rate * (reward + self.discount_rate * highest_Q_value - current_Q_value)

        self.q_table[current_state[0]][current_state[1]][action_to_choose] = new_Q_value
        self.total_actions += 1
        return done, reward
        #print("new_Q_value: ", new_Q_value)


In [57]:
# Parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.01             # Exponential decay rate for exploration prob

learning_rate = 0.7
discount_rate = 0.618

steps = 100
total_episodes = 300

#world_size = (5, 5)

# Init Q table
q_table = np.zeros((5, 5, 3))
# If you see border, then you should not go forward
for i in range(5):
    q_table[i][States.BORDER.value][Actions.FORWARD.value] = -math.inf
#print(q_table)

for episode in range(total_episodes):
    done = False
    total_reward = 0
    env = Covid19Environment(q_table)
    for s in range(steps):
        done, new_reward = env.step()
        total_reward += new_reward
        # If done : finish episode
        if done == True:
            break
    print(episode, " Finished: ", s, ", reward: ", total_reward, "Random/total actions: ", (env.random_actions*100)/env.total_actions)
        # Reduce epsilon (because we need less and less exploration)
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)

print("\n\nQ Table:\n", q_table)

reached after Kiwi!
85  Finished:  39 , reward:  95 Random/total actions:  55.0
Wooow! House reached after Kiwi!
86  Finished:  64 , reward:  82 Random/total actions:  43.07692307692308
Wooow! House reached after Kiwi!
87  Finished:  34 , reward:  100 Random/total actions:  34.285714285714285
Wooow! House reached after Kiwi!
88  Finished:  45 , reward:  90 Random/total actions:  32.608695652173914
89  Finished:  99 , reward:  -51 Random/total actions:  38.0
House reached
90  Finished:  98 , reward:  -68 Random/total actions:  40.4040404040404
House reached
91  Finished:  31 , reward:  -1 Random/total actions:  40.625
House reached
92  Finished:  67 , reward:  -37 Random/total actions:  39.705882352941174
House reached
93  Finished:  13 , reward:  17 Random/total actions:  35.714285714285715
House reached
94  Finished:  29 , reward:  1 Random/total actions:  56.666666666666664
95  Finished:  99 , reward:  -57 Random/total actions:  50.0
Wooow! House reached after Kiwi!
96  Finished:  69

In [2]:
print(player.getCenter())
print(vision.getCenter())
playerPos = [math.floor(player.getCenter().getX()), math.floor(player.getCenter().getY())]
visionPos = [math.floor(vision.getCenter().getX()), math.floor(vision.getCenter().getY())]
left = [-1, 0]
print(playerPos, visionPos)

xDestination = playerPos[0] + left[0]
xPos = xDestination - visionPos[0]
yDestination = playerPos[1] + left[1]
yPos = yDestination - visionPos[1]
newPos = [xPos, yPos]
print(xDestination, yDestination, newPos)


NameError: name 'player' is not defined

In [3]:
class Movablevision():
    up = [0, 1]
    down = [0, -1]
    right = [1, 0] 
    left = [-1, 0]
    def __init__(self, window):
        self.win = window
        self.body = Rectangle(Point(5.3,5.3), Point(5.8,5.8)) # size 0.5
        self.body.draw(self.win)
        self.body.setFill("white")
        self.vision = Rectangle(Point(5,6), Point(6,7))
        self.vision.draw(self.win)
        self.vision.setFill("blue1")
        
        self.directions = [[1, 3], [2, 0], [3, 1], [0, 2]]
        self.moves = [[[-1, -1], [1, -1]], [[1, -1], [1, 1]], [[1, 1], [-1, 1]], [[-1, 1],[-1, -1]]]
        self.states = [self.up, self.left, self.down, self.right]
        self.currentState = 0
        
    def get_body_pos(self):
        return [math.floor(self.body.getCenter().getX()), math.floor(self.body.getCenter().getY())]
    
    def get_vision_pos(self):
        return [math.floor(self.vision.getCenter().getX()), math.floor(self.vision.getCenter().getY())]
    
    def move_forward(self):
        direction = self.states[self.currentState]
        self.vision.move(direction[0], direction[1])
        self.body.move(direction[0], direction[1])
    
    def restart(self):
        self.vision.undraw()
        self.vision = Rectangle(Point(5,6), Point(6,7))
        self.vision.draw(self.win)
        self.vision.setFill("blue1")
        self.body.undraw()
        self.body = Rectangle(Point(5.3,5.3), Point(5.8,5.8))
        self.body.draw(self.win)
        self.body.setFill("white")
        self.currentState = 0
    
    def look_left(self, left = True):
        nextState = self.directions[self.currentState][0 if left else 1]
        
        #print("\nCurrentState: ", self.currentState, "left: ", left)
        #print("nextState: ", nextState)
        
        playerPos = self.get_body_pos()
        visionPos = self.get_vision_pos()

        xPos = self.moves[self.currentState][0 if left else 1][0] #xDestination - visionPos[0]

        yPos = self.moves[self.currentState][0 if left else 1][1] #yDestination - visionPos[1]
        #print(xPos, yPos)
        #print("newPos", xPos, yPos)
        self.vision.move(xPos, yPos)
        self.currentState = nextState
        #print(self.vision.getP1(), self.vision.getP1())
        #print("newState: ", self.currentState)

In [4]:
class CoronaEnvironment():
    def __init__(self):
        try:
            self.win = GraphWin('Floor', 500, 500)
            self.win.setCoords(0.0, 0.0, 10.0, 10.0)
            self.win.setBackground("blue4")

            self.draw_grid()
            self.add_house()
            self.add_survivor()
            self.add_kiwi()
            
            self.main()   
            self.win.close()
        except Exception:
            print("Ooops, something is wrong")
            print(traceback.format_exc())
            if self.win != None:    
                self.win.close()
        
    def main(self):
        while True:
            k = self.win.checkKey()

            if k == 'Left':
                self.survivor.look_left()
            elif k == 'Right':
                self.survivor.look_left(False)
            elif k == 'Up':
                self.survivor.move_forward()
            elif k == 'Down':
                self.survivor.restart()
            elif k == 'period':
                break      
        
        
    def draw_grid(self):
        print("PlottingGrid")
        for x in range(10):
            for y in range(10):
                self.win.plotPixel(x*50, y*50, "yellow")
        print("FinishedPlotting")
    
    def add_house(self):
        print("add_house")
        
        self.housePos = [0, 0]
        house = Rectangle(Point(0,0), Point(1,1))
        house.draw(self.win)
        house.setFill("brown")
        print("add_house")
        
    
    def add_survivor(self):
        print("add_survivor")
        self.survivor = Movablevision(self.win)
        print("add_survivor")
        
        
    def add_kiwi(self):
        print("add_kiwi")
        
        self.kiwiPos = [9, 9]
        kiwi = Rectangle(Point(9,9), Point(10, 10))
        kiwi.draw(self.win)
        kiwi.setFill("green")
        print("add_kiwi")
    
    def reset_environment():
        self.survivor.reset()
    
    def get_state():
        vision_pos = self.survivor
    

In [5]:
CoronaEnvironment()

PlottingGrid
FinishedPlotting
add_house
add_house
add_survivor
add_survivor
add_kiwi
add_kiwi
Ooops, something is wrong
Traceback (most recent call last):
  File "<ipython-input-4-4b29e1717d5f>", line 13, in __init__
    self.main()
  File "<ipython-input-4-4b29e1717d5f>", line 23, in main
    k = self.win.checkKey()
  File "C:\Users\Maria\Anaconda3\lib\site-packages\graphics\__init__.py", line 352, in checkKey
    raise GraphicsError("checkKey in closed window")
graphics.GraphicsError: checkKey in closed window



<__main__.CoronaEnvironment at 0x28b1a3f6988>

In [6]:
win = GraphWin('Floor', 500, 500)

win.setCoords(0.0, 0.0, 10.0, 10.0)
win.setBackground("blue4")

# draw grid
for x in range(10):
    for y in range(10):
        win.plotPixel(x*50, y*50, "yellow")

house = Rectangle(Point(0,0), Point(1,1)) # size 0.5
house.draw(win)
house.setFill("brown")

player = Rectangle(Point(5.3,5.3), Point(5.8,5.8)) # size 0.5
player.draw(win)
player.setFill("white")
vision = Rectangle(Point(5,6), Point(6,7)) # size 0.5
vision.draw(win)
vision.setFill("blue1")
kiwi = Rectangle(Point(9,9), Point(10, 10)) # size 0.5
kiwi.draw(win)
kiwi.setFill("green")



win.getMouse()
win.close()

GraphicsError: getMouse in closed window

### Q Table
- See_states: null, someone, kiwi, house (4)
- Stand_states: null, someone, kiwi, house (4)
- Actions: left, right, forward (3)

### Rewards:
- Taking 1 action: -1 (faster is better)
- Standing on Kiwi: +10
- Standing with person: -5
- Getting home: +5 (terminates the game or after 100 actions)

In [None]:
Q_table = np.zeros((4, 4, 3))
print(Q_table)

In [None]:
total_episodes = 50        # Total episodes
total_test_episodes = 1     # Total test episodes
max_steps = 99                # Max steps per episode

learning_rate = 0.7           # Learning rate
gamma = 0.618                 # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.01             # Exponential decay rate for exploration prob

In [None]:
for episode in range(total_episodes):
    # Reset the environment
    state = env.reset()
    step = 0
    done = False
    
    for step in range(max_steps):
        # 3. Choose an action a in the current world state (s)
        ## First we randomize a number
        exp_exp_tradeoff = random.uniform(0,1)
        
        ## If this number > greater than epsilon --> exploitation (taking the biggest Q value for this state)
        if exp_exp_tradeoff > epsilon:
            action = np.argmax(qtable[state,:])
        
        # Else doing a random choice --> exploration
        else:
            action = env.action_space.sample()
        
        # Take the action (a) and observe the outcome state(s') and reward (r)
        new_state, reward, done, info = env.step(action)

        # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]
        qtable[state, action] = qtable[state, action] + learning_rate * (reward + gamma * 
                                    np.max(qtable[new_state, :]) - qtable[state, action])
                
        # Our new state is state
        state = new_state
        
        # If done : finish episode
        if done == True: 
            break
    
    # Reduce epsilon (because we need less and less exploration)
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode) 