# **LunarLaunch - Policy Gradient**

The implementation of Policy Gradient for LunarLunch v2.



## Initialization

In [None]:
%matplotlib inline
from IPython import display
import matplotlib.pyplot as plt

In [None]:
from sim.GymTrainer import GymTrainer
from framework.PolicyGradient import PolicyGradientAgent

import torch
trainDevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Create the simulation environment
sim = GymTrainer(
    'LunarLander-v3', 
    evalDevice="cpu", 
    trainDevice=trainDevice, 
    render_mode='rgb_array', 
    envNum=4,
    maxEpisode=1500,
    batchSize=4,
    maxStep=1000,
    stepLimitPenalty=150,
    seed=543)

In [None]:
# Create the agent
agent = PolicyGradientAgent(
    actionNum=sim.actionSize(), 
    stateNum=8, 
    gamma=0.99, 
    policyLR=0.006)

## Training

In [None]:
name, writer = sim.makeSummaryWriter(agent)
sim.train(agent, writer)

## Testing

In [None]:
sim.test(
    agent = agent, 
    episode = 12, 
    maxStep = 1000, 
    renderStep = 3,
    writer = writer)

## Save

In [None]:
agent.save(name)