In [1]:
import numpy as np
from gym.envs.registration import register
from gym.envs.toy_text.frozen_lake import generate_random_map
from matplotlib import pyplot as plt
from sklearn.preprocessing import KBinsDiscretizer
import time, math, random, gym, warnings
warnings.filterwarnings('ignore')
plt.style.use('fivethirtyeight')
from time import sleep
# For clearing the output to view the environment's progress 
# clearly in case of terminal-based rendering
from IPython.display import clear_output 

In [2]:
game_name = "LunarLander-v2"
env = gym.make(game_name)

In [3]:
class MyAgent:
    def __init__(self, env):
        # environment is taken as argument to the class
        # Checking if the environment is discrete or continous
        self.is_env_discrete = True if type(env.action_space) == gym.spaces.discrete.Discrete else False

        # if discrete initialize action space. 
        if self.is_env_discrete:
            self.action_size = env.action_space.n
            print(f"The environment is Discrete and has Action Space {self.action_size}")
        # if continous initialize action range and shape
        else:
            self.action_low, self.action_high, self.action_shape = env.action_space.low, env.action_space.high, env.action_space.shape
            print(f"The environment is Continous and has Action Range {self.action_low} to {self.action_high} with shape {self.action_shape}")
    
    def get_action(self, state):
        # function to get the action to perform. 
        if self.is_env_discrete:
            # if the environment is discrete a value among the values in discrete state is considered.
            action = random.choice(range(self.action_size))  
        else: # if environment is continous (since there are only two possible types of environment)
            # if the environment is continous a value among the value range is selected.
            action = np.random.uniform(self.action_low, self.action_high, self.action_shape)
        return action
            
    

In [4]:
agent = MyAgent(env)
state = env.reset() # resetting the environment | initial state is returned

for _ in range(600):
    RandomAction = agent.get_action(state) # getting action
    state, reward, done, info = env.step(RandomAction) # applying action and seeing the state in the game
    print(f"State: {state}, Reward: {reward}, Done: {done}")
    # if done:
    #     break
    sleep(0.05)
    env.render()

sleep(2)
env.close()

The environment is Discrete and has Action Space 4
State: [-0.0060071   1.4330693  -0.2975417   0.47948405  0.00487993  0.02689175
  0.          0.        ], Reward: 1.4946587374148248, Done: False
State: [-0.00889788  1.4432515  -0.288633    0.45253906  0.00443663 -0.0088668
  0.          0.        ], Reward: 1.749768787148015, Done: False
State: [-0.01178865  1.4528334  -0.2886312   0.42586812  0.00399384 -0.00885653
  0.          0.        ], Reward: 1.31279629200327, Done: False
State: [-0.01467943  1.4618154  -0.28863007  0.39919892  0.00355074 -0.00886294
  0.          0.        ], Reward: 1.3285284149899326, Done: False
State: [-0.01757021  1.4701973  -0.28862888  0.37253046  0.00310772 -0.00886134
  0.          0.        ], Reward: 1.3382752608175679, Done: False
State: [-2.0390129e-02  1.4779695e+00 -2.7973992e-01  3.4543326e-01
  8.8326941e-04 -4.4493191e-02  0.0000000e+00  0.0000000e+00], Reward: 2.0877991997407164, Done: False
State: [-0.02314062  1.4851456  -0.27103433  0.