# Frozen Lake Environment

In [8]:
import time
import gymnasium as gym

env = gym.make("FrozenLake-v1", is_slippery=False, render_mode="human")



In [9]:

# Environment: Frozen Lake
# Documentation Link: https://gymnasium.farama.org/environments/toy_text/frozen_lake/
#
# Goal: crossing a frozen lake from start to goal without falling into any holes by walking over the frozen lake for a given episode
#
# Agent: Elf
#
# Actions Discrete(4):
# The action shape is (1,) in the range {0, 3} indicating which direction to move the player.
# 0: Move left
# 1: Move down
# 2: Move right
# 3: Move up
#
# State:
# The episode starts with the player in state [0] (location [0, 0]).
# The game starts with the player at location [0,0] of the frozen lake grid world with the goal located at far extent of the world e.g. [3,3] for the 4x4 environment.
# The observation is a value representing the player’s current position as current_row * ncols + current_col (where both the row and col start at 0).
# For example, the goal position in the 4x4 map can be calculated as follows: 3 * 4 + 3 = 15. The number of possible observations is dependent on the size of the map.
# The observation is returned as an int().
#
# Rewards
# Reach goal: +1
# Reach hole: 0
# Reach frozen: 0
#
# Episode End Conditions
# The episode ends if the following happens:
# Termination:
# The player moves into a hole.
# The player reaches the goal at max(nrow) * max(ncol) - 1 (location [max(nrow)-1, max(ncol)-1]).
# Truncation (when using the time_limit wrapper):
# The length of the episode is 100 for 4x4 environment, 200 for FrozenLake8x8-v1 environment.

In [10]:
env.reset()

(0, {'prob': 1})

In [11]:
observation,info = env.reset() 
observation

0

In [12]:
rand_action = env.action_space.sample()
rand_action
# 0: Move left
# 1: Move down
# 2: Move right
# 3: Move up

np.int64(0)

# Policy Functions

In [13]:
import random
import numpy as np


# Exploration Algorithm
def explorationPolicyFunction(env):
    # validActions = [0,1,2,3]
    #                 # 0: Move left
    #                 # 1: Move down
    #                 # 2: Move right
    #                 # 3: Move up    
    # action = random.choice(validActions)
    action = env.action_space.sample()  # Random action
    return action

#Exploitation Strategy
def exploitationPolicyFunction(state):


    # validActions = [0,1,2,3]
    #                 # 0: Move left
    #                 # 1: Move down
    #                 # 2: Move right
    #                 # 3: Move up    

    if state == 0:
        action = 2
    elif state == 1:
        action = 2
    elif state == 2:
        action = 1
    elif state == 3:
        action = 0
    elif state == 4:
        action = 1
    # elif state == 5:
    #     action = 1
    elif state == 6:
        action = 1
    # elif state == 7:
    #     action = 0
    elif state == 8:
        action = 2
    elif state == 9:
        action = 2
    elif state == 10:
        action = 1
    elif state == 11:
        action = 1
    elif state == 12:
        action = 2
    elif state == 13:
        action = 2
    elif state == 14:
        action = 2
    else: # state 15
        action = 2
                                                                           

    return action


# Epsilon Greedy Algorithm
def epsilonGreedy(env,currentState,epsilon):

    p = np.random.randn() # 0 to 1 values

    if p < epsilon:
        #Exploration Strategy
        action = explorationPolicyFunction(env)

    else:
        #Exploitation Strategy
        action = exploitationPolicyFunction(currentState)

    return action


In [15]:
episodes = 10
max_steps = 100

EPSILON = 0.5

# strategy = "exploration"
# strategy = "exploitation"
strategy = "eg"

for episode in range(episodes):
    
    env = gym.make("FrozenLake-v1", is_slippery=False, render_mode="human")

    # observation,info = env.reset()        
    state, _ = env.reset()
    # The observation (state) is a value representing the agent current position as current_row * ncols + current_col (where both the row and col start at 0).
    
    print(f"\nEpisode {episode + 1} started")
    
    next_state = state
    
    for step in range(max_steps):
        

        if strategy == "exploration":
            action = explorationPolicyFunction(env)   # Random action
        elif strategy == "exploitation":
            action = exploitationPolicyFunction(next_state)
        else:
            action = epsilonGreedy(env,state,EPSILON)
        
        #Supply action to the env
        next_state, reward, terminated, truncated, _ = env.step(action)

        #Print info
        print(f"Episode {episode} - Step {step} Given Action {action} I got reward {reward} and next state {next_state}")

        # time.sleep(0.3)  # Slow down for better visualization

        # the goal position in the 4x4 map can be calculated as follows: 3 * 4 + 3 = 15.
        if reward and next_state==15:
            print("You Won!!!")
            print(f"Episode {episode + 1} ended with reward: {reward}")
            env.close()
            break
        
        #Check for Termination
        if terminated or truncated:
            print("GAME OVER --- Terminated!!!")
            print(f"Episode {episode + 1} ended with reward: {reward}")
            env.close()
            break
        

    env.close()


Episode 1 started
Episode 0 - Step 0 Given Action 2 I got reward 0.0 and next state 1
Episode 0 - Step 1 Given Action 0 I got reward 0.0 and next state 0
Episode 0 - Step 2 Given Action 1 I got reward 0.0 and next state 4
Episode 0 - Step 3 Given Action 0 I got reward 0.0 and next state 4
Episode 0 - Step 4 Given Action 2 I got reward 0.0 and next state 5
GAME OVER --- Terminated!!!
Episode 1 ended with reward: 0.0

Episode 2 started
Episode 1 - Step 0 Given Action 2 I got reward 0.0 and next state 1
Episode 1 - Step 1 Given Action 1 I got reward 0.0 and next state 5
GAME OVER --- Terminated!!!
Episode 2 ended with reward: 0.0

Episode 3 started
Episode 2 - Step 0 Given Action 0 I got reward 0.0 and next state 0
Episode 2 - Step 1 Given Action 2 I got reward 0.0 and next state 1
Episode 2 - Step 2 Given Action 2 I got reward 0.0 and next state 2
Episode 2 - Step 3 Given Action 2 I got reward 0.0 and next state 3
Episode 2 - Step 4 Given Action 2 I got reward 0.0 and next state 3
Episo