# Gym library
### In this exercise we will launch some games from gym library, try to solve them manually and describe their actions, state and environment.

In [14]:
import gym
import numpy as np

# Classic Control
1. MountainCar
- environment has full visualization of the map
- agent is one and has full responsibility of what will happen
- environment is static, only the cart is moving and discrete
- this version is the one with discrete actions

In [15]:
env = gym.make('MountainCar-v0', render_mode="human")
observation, info = env.reset(seed=42)

for i in range(10):
    for _ in range(2*i):
        action = 2
        observation, reward, terminated, truncated, info = env.step(action)
        if terminated or truncated:
            observation, info = env.reset()
    for _ in range(5*i):
        action = 0
        observation, reward, terminated, truncated, info = env.step(action)
        if terminated or truncated:
            observation, info = env.reset()
    for _ in range(2*i):
        action = 1
        observation, reward, terminated, truncated, info = env.step(action)
        if terminated or truncated:
            observation, info = env.reset()


env.close()

# Box2D
1. Bipedal Walker
- environment has full visualization of the map
- one agent who makes every move
- environment is static and continuous
- set of actions is continuous

In [16]:
env = gym.make("BipedalWalker-v3", render_mode="human")
observation, info = env.reset(seed=278779)

for i in range(300):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        observation, info = env.reset()
env.close()

2. Lunar Lander
- environment has full visualization of the map
- agent has full responsibility for the moves 
- environment is static and discrete (it can be changed after adding "continuous=True" in environment)
- set of actions is continuous

In [17]:
env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset(seed=42)

for i in range(300):
    action = 0
    # print("Akcja nr ", i, ": ", action)
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        observation, info = env.reset()
env.close()


# Toy Text
1. Taxi
- environment see full map
- there is one agent who has full control
- environment is dynamic and continuous
- set of actions is discrete

In [18]:
env = gym.make("Taxi-v3", render_mode="human")
observation, info = env.reset(seed=42)

for i in range(30):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        observation, info = env.reset()
env.close()

2. Frozen Lake
- environment has full visualization of the map
- agent is one and if slippery is False has full control of character in game. Otherwise, there is a chance that the character will "slip" on the block on the right or left
- environment is static and discrete
- set of actions is discrete

In [19]:
env = gym.make('FrozenLake8x8-v1', render_mode="human", is_slippery=False)


observation, info = env.reset(seed=42)

for _ in range(7):
    action = 2
    observation, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        observation, info = env.reset()

for _ in range(7):
    action = 1
    observation, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        observation, info = env.reset()

env.close()

# Atari
1. Adventure
- environment has visualization of parts of the map
- there is one agent who plays as character and there is a chance of random action to change his move
- environment is dynamic and continuous
- set of actions is discrete

In [20]:
env = gym.make("ALE/Adventure-v5", render_mode="human")
observation, info = env.reset(seed=42)

for _ in range(10):
    action = 5
    observation, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        observation, info = env.reset()

for _ in range(30):
    action = 4
    observation, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        observation, info = env.reset()

for _ in range(10):
    action = 2
    observation, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        observation, info = env.reset()

for _ in range(15):
    action = 4
    observation, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        observation, info = env.reset()

# for _ in range(500):
#     action = env.action_space.sample()
#     observation, reward, terminated, truncated, info = env.step(action)
#     if terminated or truncated:
#         observation, info = env.reset()
env.close()

# MuJoCo
1. Inverted Pendulum
- environment has a full visualization in 3D of the map
- there is one agent who has full control
- environment is continuous as well as the set of actions

In [21]:
env = gym.make('InvertedPendulum-v4', render_mode="human")
observation, info = env.reset(seed=42)

for _ in range(300):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)

    if terminated or truncated:
        observation, info = env.reset()
env.close()