# REINFORCE (Policy Gradient)

In [1]:
import yaml
import numpy as np
from env import create_env
from algorithms.reinforce import REINFORCEAgent
%reload_ext autoreload
%autoreload 2

SEED = 42
ENV_CONFIG = './configs/env.yaml'
MODEL_CONFIG = './configs/reinforce.yaml'
MODEL_PATH = './models/reinforce.pth'

In [2]:
env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode=None,
)
env.reset(seed=SEED)

(array([[ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 0.82090656, -0.20743484],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.        ],
        [ 1.        ,  1.   

## Load Model Configs

In [3]:
with open(MODEL_CONFIG, 'r') as file:
    config = yaml.safe_load(file)
    print(config)

{'hidden_size': 64, 'learning_rate': 0.001, 'gamma': 0.8, 'num_episodes_train': 5000, 'num_episodes_eval': 10, 'top_k': 5}


## Create Agent

In [4]:
state_size = np.prod(env.observation_space.shape)
action_size = env.action_space.shape[0]
print(f"State size: {state_size}, Action size: {action_size}")
agent = REINFORCEAgent(
    state_size=state_size,
    hidden_size=config['hidden_size'],
    action_size=action_size,
    learning_rate=config['learning_rate'],
    gamma=config['gamma'],
)

State size: 72, Action size: 2


## Train Agent or Load Weights

In [5]:
# agent.load_model(
#     model_path = MODEL_PATH,
# )

In [None]:
agent.train(
    env=env,
    num_episodes=config['num_episodes_train'],
)

Training REINFORCE Agent:   0%|          | 19/5000 [00:11<53:13,  1.56it/s]

## Save Model Weights

In [None]:
agent.save_model(
    model_path=MODEL_PATH,
)

## Evaluate Agent Performance

In [None]:
eval_env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode='rgb_array',
)
eval_env.reset(seed=SEED)
agent.evaluate(
    env=eval_env,
    num_episodes=config['num_episodes_eval'],
    top_k=config['top_k'],
)