# OpenAI Gym

## Agent Anatomy

In [1]:
import random
from typing import List


class Environment:
    """
    Dummy environment that returns random rewards.
    """

    def __init__(self) -> None:
        self.steps_left = 10

    def get_observation(self) -> List[float]:
        """Get current observation from this environment"""
        return [0.0] * 3

    def get_actions(self) -> List[int]:
        """Return currenlty available actions"""
        return [0, 1]

    def is_done(self) -> bool:
        """Returns true when the game is done"""
        return self.steps_left == 0

    def action(self, action: int) -> float:
        """
        Apply given action in this environment.
        Returns random reward.
        """

        # Panic if this method is called when the game is over
        if self.is_done():
            raise Exception("Game is over")

        # Record that a step has been made and return random reward
        self.steps_left -= 1
        return random.random()


class Agent:
    """
    Dummy agent that plays according to a random policy.
    """

    def __init__(self) -> None:
        self.total_reward = 0.0

    def step(self, env: Environment) -> None:
        """
        Make one policy step in given environment.
        """

        # Get current observation from the environment
        #  - Note: This agent actually ignores it.
        _ = env.get_observation()

        # Choose random action
        action = random.choice(env.get_actions())

        # Apply slected action in the environment and collect reward
        reward = env.action(action)
        self.total_reward += reward


def run_episode(i: int) -> None:
    # Create the environment and agent
    env = Environment()
    agent = Agent()

    # Run the game until done
    #  - i.e. run single episode
    while not env.is_done():
        agent.step(env)

    # Show total reward for the episode
    print(f"Episode: {i + 1}\tTotal reward: {agent.total_reward:.4f}")


# Run few episodes
for i in range(5):
    run_episode(i)

Episode: 1	Total reward: 5.2205
Episode: 2	Total reward: 3.7717
Episode: 3	Total reward: 4.3059
Episode: 4	Total reward: 4.5668
Episode: 5	Total reward: 3.6061
