# 01 Random agent baseline

#### 👉Before you try to solve a Reinforcement Learning problem you should get a grasp of its difficulty.

#### 👉 To do so, you need to design a dummy agent that can peform the task without much brains, and evaluate its performance.

#### 👉A simple way to do so is by using a Random Agent, that chooses its next action randomly, without paying attention at the current state of the environment.

In [None]:
%load_ext autoreload
%autoreload 2
%pylab inline
%config InlineBackend.figure_format = 'svg'

## Environment 🌎

In [None]:
import gymnasium as gym
env = gym.make('LunarLander-v3', render_mode='rgb_array')

## Random agent

In [None]:
class RandomAgent:

    def __init__(self, env):
        self.env = env

    def act(self, state) -> int:
        """
        No input arguments to this function.
        The agent does not consider the state of the environment when deciding
        what to do next.
        """
        return self.env.action_space.sample()

agent = RandomAgent(env)

## Evaluate performance of a Random agent ⏱️

In [None]:
from tqdm import tqdm

n_episodes = 100
reward_per_episode = []
success_per_episode = []

for i in tqdm(range(0, n_episodes)):

    state, _ = env.reset()
    total_reward = 0
    done = False
    reward = None
    
    while not done:
        action = agent.act(state)
        next_state, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        total_reward += reward
        state = next_state
        
    reward_per_episode.append(total_reward)
    success_per_episode.append(1 if reward > 0 else 0)

In [None]:
import numpy as np
reward_avg = np.array(reward_per_episode).mean()
reward_std = np.array(reward_per_episode).std()
print(f'Reward average {reward_avg:.2f}, std {reward_std:.2f}')

success_rate = np.array(success_per_episode).mean()
print(f'Succes rate = {success_rate:.2%}')

## Reward distribution

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

fig, ax = plt.subplots(figsize = (10, 4))
ax.set_title("Rewards")    
pd.Series(reward_per_episode).plot(kind='hist', bins=100)

plt.show()

## Let's see our agent in action 🎬

In [None]:
# Workaround for pygame error: "error: No available video device"
# See https://stackoverflow.com/questions/15933493/pygame-error-no-available-video-device?rq=1
# This is probably needed only for Linux
import os
os.environ["SDL_VIDEODRIVER"] = "dummy"
from src.viz import show_video

env = gym.make('LunarLander-v3', render_mode='rgb_array')
show_video(agent, env, sleep_sec=0.01, seed=12345)