# Random Walk

## Monte Carlo Simulation
- Agent: Moving agent takes random walks in the environment. 
- Actions: N, S, E, W, NE, NW, SE, SW -> small or normal steps
- State: State the agent is in -> taking an action in state -> the next state the agent will end up in.
- Environment: Collection of states -> agent takes actions -> observations and rewards. 
- Episodes: 10,000 simulations

In [21]:
import random

In [22]:
class Agent:
    
    def __init__(self):
        self.actions = {"N": (-1, 0), "S": (1, 0), "E": (0, 1), "W": (0, -1), \
                        "NE": (-1, 1), "SE": (1, 1), "NW": (-1, -1), "SW": (1, -1)}
        
        self.small_actions = {"N": (-0.1, 0.0), "S": (0.1, 0.0), "E": (0.0, 0.1), "W": (0.0, -0.1), \
                              "NE": (-0.1, 0.1), "SE": (0.1, 0.1), "NW": (-0.1, -0.1), "SW": (0.1, -0.1)}
        
    def step(self):
        """
        Return a random step
        """
        
        return random.choice(list(self.actions.values()))
        
    def small_step(self):
        """
        Return a random small step
        """
        
        return random.choice(list(self.small_actions.values()))

    def move(self, path):
        """
        Return a big or small step based on state's path size that agent encounters
        """
        
        # check path size for small step
        if path >= 0 and path <= 0.5:
            return self.small_step()
        
        # return normal step
        return self.step()

In [23]:
class State:
    
    def __init__(self, x = 0, y = 0):
        self.x = x
        self.y = y
        self.path = random.uniform(-1, 3)

    def action(self, move):
        """
        Return a new location after agent's action (dx and dy)
        """
        
        # Agent's action
        dx, dy = move
        
        return State(self.x + dx, self.y + dy)
    
    def get_location(self):
        """
        Return the agent's current (x, y) coordinates
        """
        
        return (self.x, self.y)
    
    def get_x(self):
        """
        Return x coordinate
        """
        
        return self.x
    
    def get_y(self):
        """
        Return y coordinate
        """
        
        return self.y
    
    def get_path(self):
        """
        Return the size of the path from current state to next state
        """
        
        return self.path
    
    def distance(self, next_state):
        """
        Return distance to any state: utilizing Euclidean Distance
        """
        
        return ((self.x - next_state.get_x())**2 + (self.y - next_state.get_y())**2)**0.5
    
    def start_distance(self):
        """
        Return distance from starting point
        """
        
        return abs(self.x) + abs(self.y)
    
    def __repr__(self):
        return f"{self.x}, {self.y}"

In [24]:
class Environment:
    
    def __init__(self):
        self.agent = Agent()
        self.start_state = self.current_state = State()
        self.exploration_state = State(x = random.randint(0, 31), y = random.randint(0, 31))        
        self.goal_state = State(x = random.randint(0, 31), y = random.randint(0, 31))
        self.V = {}
        self.locations = [self.start_state]
        self.consequence = -1
        self.reward_goal = 1
    
    def seen(self, state):
        """
        Add's to the Agent's trajectory
        """
        
        # Add current state to agent's history
        self.locations.append(state)
        
    def rewards(self):
        """
        Return reward of 1 or consequence of -1
        """
        
        # Return reward if goal state reached
        if self.current_state == self.goal_state:
            return self.reward_goal
        
        # Consequence for not reaching goal state
        return self.consequence
        
    def move(self, steps):
        """
        Return tuple (start distance, goal distance, state, action, reward
        """
        
        # Keep track of state
        state = self.current_state
        
        # Simulate walk based on number of steps
        for _ in range(steps):
            
            # Take walk
            walk = self.agent.move(self.current_state.get_path())
            
            # Get new state
            self.current_state = state.action(walk)
            
            # Add state to seen locations
            self.seen(self.current_state)
        
        return self.current_state.start_distance(), self.current_state.distance(self.goal_state), state, walk, self.rewards()
    
    def simulation_test(self, miles, simulations):
        """
        Return distances after basic Monte Carlo simulation
        """
        
        # Environment makes up an n miles of walkable terrain
        for mile in range(1, miles+1):
            
            # Number of walks that agent doesn't need rescue
            no_rescue = 0
            
            # returns
            returns = []
            
            # Loop for each episode
            for runs in range(simulations):
            
                # Generate and episode (St, At, Rt+1) and distances from start and goal
                start_distance, goal_distance, state, action, reward = self.move(mile)
                
                # Check if rescue is not needed 
                if state.start_distance() <= 4:
                    
                    # Increase safety count
                    no_rescue += 1
                
                # Add St, At, Rt+1 to episode trajectory
                returns.append((state, action, reward))
                
            # All distances of states from start and goal
            safety_percentage = (no_rescue / simulations)*100
            
            # V(St) <-- average(Returns(St))
            self.V[self.current_state] = safety_percentage
            
            # Print simulations
            print(f"Miles walked: {mile}  safety: {safety_percentage}%")
            
        # return V
        return self.V
            

In [25]:
if __name__ == "__main__":
    e = Environment()
    
    print(e.simulation_test(30, 20000))

Miles walked: 1  safety: 0.7100000000000001%
Miles walked: 2  safety: 0.0%
Miles walked: 3  safety: 0.0%
Miles walked: 4  safety: 0.0%
Miles walked: 5  safety: 0.0%
Miles walked: 6  safety: 0.0%
Miles walked: 7  safety: 0.0%
Miles walked: 8  safety: 0.0%
Miles walked: 9  safety: 0.0%
Miles walked: 10  safety: 0.0%
Miles walked: 11  safety: 0.0%
Miles walked: 12  safety: 0.0%
Miles walked: 13  safety: 0.0%
Miles walked: 14  safety: 0.0%
Miles walked: 15  safety: 0.0%
Miles walked: 16  safety: 0.0%
Miles walked: 17  safety: 0.0%
Miles walked: 18  safety: 0.0%
Miles walked: 19  safety: 0.0%
Miles walked: 20  safety: 0.0%
Miles walked: 21  safety: 0.0%
Miles walked: 22  safety: 0.0%
Miles walked: 23  safety: 0.0%
Miles walked: 24  safety: 0.0%
Miles walked: 25  safety: 0.0%
Miles walked: 26  safety: 0.0%
Miles walked: 27  safety: 0.0%
Miles walked: 28  safety: 0.0%
Miles walked: 29  safety: 0.0%
Miles walked: 30  safety: 0.0%
{-69.39999999999992, 63.20000000000002: 0.7100000000000001, -293