# quantum version of RL
[QRL 논문](https://arxiv.org/pdf/2108.06849)에서 소개한 방법으로, DQN에서 agent쪽을 양자버전으로 바꾸는 것은 해보자.

In [4]:
import gymnasium as gym
import math
import random
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import pennylane as qml

In [None]:
# hyperparameters
N_QUBITS = 4
BATCH_SIZE = 32
EPS_START = 0.9 
EPS_END = 0.05
EPS_DECAY = 1000
TAU = 0.005
LR = 1e-4

In [None]:
# set up environment
env = gym.make('CartPole-v1') # input -> 0: left, 1: right, output -> R^4 vector

# set up devices
print("CUDA Available:",torch.cuda.is_available())
classical_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Qubit number:",N_QUBITS)
quantum_device = qml.device("default.qubit", wires=N_QUBITS)

CUDA Available: True
Qubit number: 4


In [8]:
# set up the memory
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        """Saves a transition."""
        self.memory.append(Transition(*args)) 

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        return len(self.memory)

In [None]:
# Classical Critic Network
class ClassicalCritic(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(4, 128)
        self.fc_v = torch.nn.Linear(128, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc_v(x)
