# REINFORCE (Policy Gradient)

In [None]:
import yaml
import numpy as np
from env import create_env
from algorithms.reinforce import REINFORCEAgent
%reload_ext autoreload
%autoreload 2

SEED = 42
ENV_CONFIG = './configs/env.yaml'
MODEL_CONFIG = './configs/reinforce.yaml'

In [None]:
env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode=None,
)
env.reset(seed=SEED)

## Load Model Configs

In [None]:
with open(MODEL_CONFIG, 'r') as file:
    config = yaml.safe_load(file)
    print(config)

## Create Agent

In [None]:
state_size = np.prod(env.observation_space.shape)
action_size = len(env.unwrapped.action_type.actions)
print(f"State size: {state_size}, Action size: {action_size}")
agent = REINFORCEAgent(
    state_size=state_size,
    action_size=action_size,
    learning_rate=config['learning_rate'],
    gamma=config['gamma'],
)

## Train Agent or Load Weights

In [None]:
agent.train(
    env=env,
    num_episodes=config['num_episodes_train'],
)

In [None]:
agent.load_model()

## Save Model Weights

In [None]:
agent.save_model()

## Evaluate Agent Performance

In [None]:
eval_env = create_env(
    config_filepath=ENV_CONFIG,
    render_mode='rgb_array',
)
eval_env.reset(seed=SEED)
agent.evaluate(
    env=eval_env,
    num_episodes=config['num_episodes_eval'],
    top_k=config['top_k'],
)