# DQN Class

DQN(NIPS-2013)
"Playing Atari with Deep Reinforcement Learning"
https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf


DQN(Nature-2015)
"Human-level control through deep reinforcement learning"
http://web.stanford.edu/class/psych209/Readings/MnihEtAlHassibis15NatureControlDeepRL.pdf



ref: https://github.com/hunkim/ReinforcementZeroToAll/blob/master/dqn.py

# DQN class can

1) Build network\
2) Predict Q_value given state\
3) Train parameters

Args :

 session (tf.Session): Tensorflow session\
 input_size (int): Input dimension\
 output_size (int): Number of discrete actions\
 name (str, optional): TF Graph will be built under this name scope
 
# DQN Network architecture (simple MLP) :

Args:

 h_size (int, optional): Hidden layer dimension\
 l_rate (float, optional): Learning rate
 
acknowledgement:

hidden layer 수 = 1\
hidden layer dimension/node 수 =10\
learning rate = 0.001\
activation function = reLu\
loss function = MSE\
optimizer = adam\




In [2]:
import numpy as np
import tensorflow as tf

In [12]:
class DQN:
    def __init__(self, session: tf.compat.v1.Session, input_size: int, output_size: int, name: str="main"):
        
        # tensorflow 2.0.0버전에선 tf.Session 이사용되지않음.  tf.compat.v1.Session 로 변경
        
        self.session =session
        self.input_size = input_size
        self.output_size = output_size
        self.net_name = name
        
        self._build_network()
        
#===============================================================================================
        
    def _build_network(self, h_size = 10, l_rate = 0.001):
    
        with tf.variable_scope(self.net_name):
            self._X = tf.placeholder(tf.float32, [None, self.input_size], name="input_x")
            net = self._X

            net = tf.layers.dense(net, h_size, activation=tf.nn.relu)
            net = tf.layers.dense(net, self.output_size)
            self._Qpred = net

            self._Y = tf.placeholder(tf.float32, shape=[None, self.output_size])
            
            self._loss = tf.losses.mean_squared_error(self._Y, self._Qpred)

            optimizer = tf.train.AdamOptimizer(learning_rate=l_rate)
            self._train = optimizer.minimize(self._loss)
            
#===============================================================================================
            
    # Q(s,a) 을 리턴 하는 predict 함수
    # state (np.ndarray): State array, shape (n, imput_dim)
    # return np.ndarray: Q value array, shape (n, output_dim)
    
    
    def predict(self, state: np.ndarray):
        
        x = np.reshape(state, [-1, self.input_size])
        return self.session.run(self._Qpred, feed_dict={self._X: x})
    
#===============================================================================================
    # X, y 를 받아 업데이트 하고 loss, result from train step 을 리턴
    
    
    def update(self, x_stack: np.ndarray, y_stack: np.ndarray):
       
        feed = {
            self._X: x_stack,
            self._Y: y_stack
        }
        return self.session.run([self._loss, self._train], feed)