In [None]:
import gymnasium as gym

# Initialise the environment
env = gym.make("LunarLander-v3", render_mode="human")

# Get the first observation -> Initial State
observation, info = env.reset(seed=3)
# Here observation is the current state of the environment (i.e, the position and velocity of the lander)
# observation is a numpy array of 8 floats representing the x coordinate, y coordinate, x velocity, y velocity, lander angle, angular velocity, left leg contact, right leg contact
# You can use a subset of these values to create your own strategy for landing the lunar lander.
# For example, I am using the x coordinate and y coordinate to create a simple strategy.
x_coord = observation[0]
y_coord = observation[1]
x_velocity = observation[2]
y_velocity = observation[3]
lander_angle = observation[4]
print("Initial Observation:", observation)
run = True
total_reward = 0
while(run):
    # this is where you would insert your strategy
    # print(x_coord, y_coord)
    if x_coord < -0.01:
        action = 3 # Fire right engine
    elif x_coord > 0.01:
        action = 1  # Fire left engine
    else:
        action = 0   # Do nothing
    if y_coord < 0.8:
        action = 2  # Fire main engine
    else:
        action = 0
    if y_velocity < -0.1:
        action = 2

    if lander_angle > 0.01:
       action = 3
    elif lander_angle < -0.01:
        action = 1
    # step (transition) through the environment with the action
    # receiving the next observation, reward and if the episode has terminated or truncated
    observation, reward, terminated, truncated, info = env.step(action)
    x_coord = observation[0]
    y_coord = observation[1]
    x_velocity = observation[2]
    y_velocity = observation[3]
    lander_angle = observation[4]
    total_reward += reward
    # If the episode has ended then we can reset to start a new episode
    if terminated or truncated:
        observation, info = env.reset()
        run = False

print("Total Reward:", total_reward)
env.close()