# 03 Vanilla Policy Gradient with rewards as weights

#### 👉🏽 The policy network will have the following architecture:

<img src="https://github.com/Paulescu/hands-on-rl/blob/main/04_lunar_lander/images/policy_network.svg?raw=True" width="300"/>

#### 👉🏽 And we will use the rewards to compute the weights in the policy gradient formula.

In [None]:
%load_ext autoreload
%autoreload 2
%pylab inline
%config InlineBackend.figure_format = 'svg'

## Environment 🚀🌙

In [None]:
ENV_NAME = 'LunarLander-v3'

## Create agent and set hyperparameters

In [None]:
from src.vpg_agent import VPGAgent

agent = VPGAgent(
    env_name=ENV_NAME,
    learning_rate=3e-3,
    hidden_layers=[16, 16],
    gradient_weights='rewards',
)

## Tensorboard logger to see training metrics

In [None]:
from src.utils import get_agent_id
agent_id = get_agent_id(ENV_NAME)
print(f'agent_id = {agent_id}')

# tensorboard logger to see training curves
from src.utils import get_logger, get_model_path
logger = get_logger(env_name=ENV_NAME, agent_id=agent_id)

# path to save policy network weights and hyperparameters
model_path = get_model_path(env_name=ENV_NAME, agent_id=agent_id)

## Training 🏋️

In [None]:
agent.train(
    n_policy_updates=5000,
    batch_size=256,
    logger=logger,
    model_path=model_path,
)

## Evaluate the agent ⏱️

In [None]:
rewards, success = agent.evaluate(n_episodes=100)

### Average reward

In [None]:
import numpy as np
reward_avg = np.array(rewards).mean()
reward_std = np.array(rewards).std()
print(f'Reward average {reward_avg:.2f}, std {reward_std:.2f}')

success_rate = np.array(success).mean()
print(f'Succes rate = {success_rate:.2%}')

## Reward distribution

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

fig, ax = plt.subplots(figsize = (10, 4))
ax.set_title("Rewards")    
pd.Series(rewards).plot(kind='hist', bins=100)

plt.show()

## Let's see our agent in action 🎬

In [None]:
# Workaround for pygame error: "error: No available video device"
# See https://stackoverflow.com/questions/15933493/pygame-error-no-available-video-device?rq=1
# This is probably needed only for Linux
import os
os.environ["SDL_VIDEODRIVER"] = "dummy"
from src.viz import show_video

show_video(agent, env, sleep_sec=0.01, seed=12345)