# **LunarLaunch - Deep QLearning**

The implementation of Deep QLearning for LunarLunch v2.



## Initialization

In [1]:
%matplotlib inline
from IPython import display
import matplotlib.pyplot as plt

In [2]:
from sim.GymTrainer import GymTrainer
from framework.DeepQLearning import DeepQLearning

import torch
trainDevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Create the simulation environment
sim = GymTrainer(
    'LunarLander-v3', 
    evalDevice="cpu", 
    trainDevice=trainDevice, 
    render_mode='rgb_array', 
    envNum=8,
    maxEpisode=5000,
    batchSize=32,
    maxStep=10000,
    stepLimitPenalty=0,
    seed=543)

Initializing Gym Environments of LunarLander-v3
init envs
set seeds 543


In [4]:
# Create the agent
agent = DeepQLearning(
    actionNum=sim.actionSize(), 
    stateNum=8, 
    gamma=0.99, 
    qNetLR=0.0005)

## Training

In [5]:
name, writer = sim.makeSummaryWriter(agent)
sim.train(agent, writer)

Batch:1 	 Episode:32 	 Loss: 9800.48 	 AvgRew: -168.55 	 FinRew: -100.00
Batch:2 	 Episode:64 	 Loss: 11630.63 	 AvgRew: -182.00 	 FinRew: -100.00
Batch:3 	 Episode:96 	 Loss: 13223.72 	 AvgRew: -202.06 	 FinRew: -100.00
Batch:4 	 Episode:128 	 Loss: 14183.26 	 AvgRew: -185.80 	 FinRew: -100.00
Batch:5 	 Episode:161 	 Loss: 9763.71 	 AvgRew: -167.08 	 FinRew: -100.00
Batch:6 	 Episode:193 	 Loss: 16101.62 	 AvgRew: -209.91 	 FinRew: -100.00
Batch:7 	 Episode:225 	 Loss: 8265.07 	 AvgRew: -166.87 	 FinRew: -100.00
Batch:8 	 Episode:257 	 Loss: 10697.32 	 AvgRew: -175.41 	 FinRew: -100.00
Batch:9 	 Episode:289 	 Loss: 8405.08 	 AvgRew: -169.18 	 FinRew: -100.00
Batch:10 	 Episode:321 	 Loss: 10744.01 	 AvgRew: -175.31 	 FinRew: -100.00
Batch:11 	 Episode:353 	 Loss: 11431.82 	 AvgRew: -192.31 	 FinRew: -100.00
Batch:12 	 Episode:385 	 Loss: 12550.15 	 AvgRew: -201.91 	 FinRew: -100.00
Batch:13 	 Episode:417 	 Loss: 9062.77 	 AvgRew: -164.87 	 FinRew: -100.00
Batch:14 	 Episode:449 	 Loss

KeyboardInterrupt: 

## Testing

In [None]:
sim.test(
    agent = agent, 
    episode = 12, 
    maxStep = 1000, 
    renderStep = 3,
    writer = writer)

## Save

In [None]:
agent.save(name)