In [None]:
import gym
import numpy as np
import random
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam


EPISODES = 1000

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = [] #deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma *
                          np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


if __name__ == "__main__":
    env = gym.make('CartPole-v1')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    # agent.load("./save/cartpole-dqn.h5")
    done = False
    batch_size = 32

    for e in range(EPISODES):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        for time in range(500):
            # env.render()
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                print("episode: {}/{}, score: {}, e: {:.2}"
                      .format(e, EPISODES, time, agent.epsilon))
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)

In [1]:
import gym
import numpy as np
import random
import tensorflow as tf
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

gamma = 0.95
alpha = 0.5
learning_rate_adam = 0.01
epsilon = 0.999
epsilon_decay = 0.99


class DQN:

    def __init__(self, observation_space, action_space):
        
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.action_space = action_space
        self.observation_space = observation_space
        
        self.memory = []
        self.batch_size = 32

        self.model = Sequential()
        self.model.add(Dense(24, input_shape=(self.observation_space,), activation="relu"))
        self.model.add(Dense(24, activation="relu"))
        self.model.add(Dense(self.action_space, activation="linear"))
        self.model.compile(loss="mse", optimizer=Adam(lr=learning_rate_adam))

    def choose_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.randrange(self.action_space)
        q = self.model.predict(state)
        return np.argmax(q[0])

    def update(self):
        if len(self.memory) < self.batch_size: 
            return
        batch = random.sample(self.memory, self.batch_size) 
        for state, action, reward, next_state, done in batch: 
            update_value = reward
            if not done:
                 update_value = self.alpha * (reward + self.gamma * np.max(self.model.predict(next_state)[0]))
            #predict and update Q-values
            q = self.model.predict(state) 
            q[0][action] = update_value
            self.model.fit(state, q, verbose=0)
        self.epsilon *= epsilon_decay

    def memory_update(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

def cartpole():
    env = gym.make("CartPole-v1")
    observation_space, action_space = env.observation_space.shape[0], env.action_space.n
    epoch = 0
    dqn = DQN(observation_space, action_space)
    
    while True:
        score = 0
        epoch += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        while True:
            score += 1
            action = dqn.choose_action(state)
            next_state, reward, done, info = env.step(action)
            reward = reward if not done else -reward
            next_state = np.reshape(next_state, [1, observation_space])
            dqn.memory_update(state, action, reward, next_state, done)
            state = next_state
            if done:
                print ("Epoch: " + str(epoch) + " Score: " + str(score))
                break
            dqn.update()


if __name__ == "__main__":
    cartpole()

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch: 1 Score: 18
Instructions for updating:
Use tf.cast instead.
Epoch: 2 Score: 46
Epoch: 3 Score: 50
Epoch: 4 Score: 19
Epoch: 5 Score: 20
Epoch: 6 Score: 11
Epoch: 7 Score: 21
Epoch: 8 Score: 88
Epoch: 9 Score: 57
Epoch: 10 Score: 21
Epoch: 11 Score: 13
Epoch: 12 Score: 21
Epoch: 13 Score: 13
Epoch: 14 Score: 65
Epoch: 15 Score: 102
Epoch: 16 Score: 27
Epoch: 17 Score: 23
Epoch: 18 Score: 20
Epoch: 19 Score: 26
Epoch: 20 Score: 30
Epoch: 21 Score: 20
Epoch: 22 Score: 10
Epoch: 23 Score: 28
Epoch: 24 Score: 36
Epoch: 25 Score: 15
Epoch: 26 Score: 67
Epoch: 27 Score: 28
Epoch: 28 Score: 16
Epoch: 29 Score: 26
Epoch: 30 Score: 16
Epoch: 31 Score: 13
Epoch: 32 Score: 52
Epoch: 33 Score: 10
Epoch: 34 Score: 42
Epoch: 35 Score: 22
Epoch: 36 Score: 59
Epoch: 37 Score: 41
Epoch: 38 Score: 28
Epoch: 39 Score: 48
Epoch: 40 Score: 21
Epoch: 41 Score: 13
Epoch: 42 Score: 17


Epoch: 353 Score: 18
Epoch: 354 Score: 10
Epoch: 355 Score: 16
Epoch: 356 Score: 25
Epoch: 357 Score: 14
Epoch: 358 Score: 14
Epoch: 359 Score: 61
Epoch: 360 Score: 13
Epoch: 361 Score: 25
Epoch: 362 Score: 20
Epoch: 363 Score: 73
Epoch: 364 Score: 14
Epoch: 365 Score: 34
Epoch: 366 Score: 19
Epoch: 367 Score: 23
Epoch: 368 Score: 10
Epoch: 369 Score: 44
Epoch: 370 Score: 16
Epoch: 371 Score: 20
Epoch: 372 Score: 18
Epoch: 373 Score: 29
Epoch: 374 Score: 19
Epoch: 375 Score: 9
Epoch: 376 Score: 11
Epoch: 377 Score: 14
Epoch: 378 Score: 31
Epoch: 379 Score: 46
Epoch: 380 Score: 35
Epoch: 381 Score: 18
Epoch: 382 Score: 11
Epoch: 383 Score: 27
Epoch: 384 Score: 14
Epoch: 385 Score: 18
Epoch: 386 Score: 12
Epoch: 387 Score: 14
Epoch: 388 Score: 25
Epoch: 389 Score: 17
Epoch: 390 Score: 15
Epoch: 391 Score: 39
Epoch: 392 Score: 18
Epoch: 393 Score: 35
Epoch: 394 Score: 17
Epoch: 395 Score: 16
Epoch: 396 Score: 25
Epoch: 397 Score: 38
Epoch: 398 Score: 23
Epoch: 399 Score: 15
Epoch: 400 Sco

Epoch: 744 Score: 29
Epoch: 745 Score: 40
Epoch: 746 Score: 31
Epoch: 747 Score: 18
Epoch: 748 Score: 9
Epoch: 749 Score: 16
Epoch: 750 Score: 34
Epoch: 751 Score: 29
Epoch: 752 Score: 18
Epoch: 753 Score: 18
Epoch: 754 Score: 16
Epoch: 755 Score: 14
Epoch: 756 Score: 24
Epoch: 757 Score: 27
Epoch: 758 Score: 74
Epoch: 759 Score: 11
Epoch: 760 Score: 57
Epoch: 761 Score: 18
Epoch: 762 Score: 35
Epoch: 763 Score: 19
Epoch: 764 Score: 37
Epoch: 765 Score: 26
Epoch: 766 Score: 15
Epoch: 767 Score: 26
Epoch: 768 Score: 11
Epoch: 769 Score: 28
Epoch: 770 Score: 30
Epoch: 771 Score: 24
Epoch: 772 Score: 19
Epoch: 773 Score: 81
Epoch: 774 Score: 17
Epoch: 775 Score: 15
Epoch: 776 Score: 39
Epoch: 777 Score: 31
Epoch: 778 Score: 12
Epoch: 779 Score: 14
Epoch: 780 Score: 22
Epoch: 781 Score: 45
Epoch: 782 Score: 17
Epoch: 783 Score: 12
Epoch: 784 Score: 29
Epoch: 785 Score: 16
Epoch: 786 Score: 13
Epoch: 787 Score: 12
Epoch: 788 Score: 33
Epoch: 789 Score: 32
Epoch: 790 Score: 19
Epoch: 791 Sco

Epoch: 1129 Score: 76
Epoch: 1130 Score: 18
Epoch: 1131 Score: 26
Epoch: 1132 Score: 48
Epoch: 1133 Score: 15
Epoch: 1134 Score: 13
Epoch: 1135 Score: 29
Epoch: 1136 Score: 29
Epoch: 1137 Score: 17
Epoch: 1138 Score: 15
Epoch: 1139 Score: 36
Epoch: 1140 Score: 13
Epoch: 1141 Score: 16
Epoch: 1142 Score: 16
Epoch: 1143 Score: 25
Epoch: 1144 Score: 13
Epoch: 1145 Score: 16
Epoch: 1146 Score: 69
Epoch: 1147 Score: 26
Epoch: 1148 Score: 17
Epoch: 1149 Score: 12
Epoch: 1150 Score: 20
Epoch: 1151 Score: 46
Epoch: 1152 Score: 27
Epoch: 1153 Score: 9
Epoch: 1154 Score: 25
Epoch: 1155 Score: 26
Epoch: 1156 Score: 21
Epoch: 1157 Score: 28
Epoch: 1158 Score: 41
Epoch: 1159 Score: 17
Epoch: 1160 Score: 27
Epoch: 1161 Score: 22
Epoch: 1162 Score: 17
Epoch: 1163 Score: 51
Epoch: 1164 Score: 48
Epoch: 1165 Score: 16
Epoch: 1166 Score: 37
Epoch: 1167 Score: 101
Epoch: 1168 Score: 13
Epoch: 1169 Score: 14
Epoch: 1170 Score: 16
Epoch: 1171 Score: 20
Epoch: 1172 Score: 21
Epoch: 1173 Score: 17
Epoch: 117

Epoch: 1502 Score: 13
Epoch: 1503 Score: 32
Epoch: 1504 Score: 26
Epoch: 1505 Score: 18
Epoch: 1506 Score: 34
Epoch: 1507 Score: 14
Epoch: 1508 Score: 16
Epoch: 1509 Score: 32
Epoch: 1510 Score: 24
Epoch: 1511 Score: 23
Epoch: 1512 Score: 29
Epoch: 1513 Score: 12
Epoch: 1514 Score: 25
Epoch: 1515 Score: 91
Epoch: 1516 Score: 14
Epoch: 1517 Score: 9
Epoch: 1518 Score: 18
Epoch: 1519 Score: 32
Epoch: 1520 Score: 14
Epoch: 1521 Score: 15
Epoch: 1522 Score: 12
Epoch: 1523 Score: 13
Epoch: 1524 Score: 23
Epoch: 1525 Score: 26
Epoch: 1526 Score: 49
Epoch: 1527 Score: 20
Epoch: 1528 Score: 21
Epoch: 1529 Score: 20
Epoch: 1530 Score: 19
Epoch: 1531 Score: 17
Epoch: 1532 Score: 17
Epoch: 1533 Score: 25
Epoch: 1534 Score: 28
Epoch: 1535 Score: 40
Epoch: 1536 Score: 18
Epoch: 1537 Score: 35
Epoch: 1538 Score: 14
Epoch: 1539 Score: 36
Epoch: 1540 Score: 17
Epoch: 1541 Score: 16
Epoch: 1542 Score: 50
Epoch: 1543 Score: 29
Epoch: 1544 Score: 40
Epoch: 1545 Score: 21
Epoch: 1546 Score: 39
Epoch: 1547

Epoch: 1875 Score: 30
Epoch: 1876 Score: 23
Epoch: 1877 Score: 15
Epoch: 1878 Score: 12
Epoch: 1879 Score: 54
Epoch: 1880 Score: 19
Epoch: 1881 Score: 35
Epoch: 1882 Score: 11
Epoch: 1883 Score: 24
Epoch: 1884 Score: 23
Epoch: 1885 Score: 36
Epoch: 1886 Score: 13
Epoch: 1887 Score: 13
Epoch: 1888 Score: 26
Epoch: 1889 Score: 18
Epoch: 1890 Score: 36
Epoch: 1891 Score: 24
Epoch: 1892 Score: 22
Epoch: 1893 Score: 25
Epoch: 1894 Score: 21
Epoch: 1895 Score: 17
Epoch: 1896 Score: 19
Epoch: 1897 Score: 46
Epoch: 1898 Score: 11
Epoch: 1899 Score: 20
Epoch: 1900 Score: 21
Epoch: 1901 Score: 11
Epoch: 1902 Score: 33
Epoch: 1903 Score: 38
Epoch: 1904 Score: 29
Epoch: 1905 Score: 19
Epoch: 1906 Score: 8
Epoch: 1907 Score: 14
Epoch: 1908 Score: 21
Epoch: 1909 Score: 26
Epoch: 1910 Score: 17
Epoch: 1911 Score: 13
Epoch: 1912 Score: 17
Epoch: 1913 Score: 18
Epoch: 1914 Score: 33
Epoch: 1915 Score: 15
Epoch: 1916 Score: 22
Epoch: 1917 Score: 14
Epoch: 1918 Score: 39
Epoch: 1919 Score: 11
Epoch: 1920

Epoch: 2248 Score: 22
Epoch: 2249 Score: 18
Epoch: 2250 Score: 16
Epoch: 2251 Score: 14
Epoch: 2252 Score: 11
Epoch: 2253 Score: 48
Epoch: 2254 Score: 51
Epoch: 2255 Score: 27
Epoch: 2256 Score: 26
Epoch: 2257 Score: 25
Epoch: 2258 Score: 27
Epoch: 2259 Score: 23
Epoch: 2260 Score: 19
Epoch: 2261 Score: 34
Epoch: 2262 Score: 16
Epoch: 2263 Score: 15
Epoch: 2264 Score: 16
Epoch: 2265 Score: 22
Epoch: 2266 Score: 12
Epoch: 2267 Score: 32
Epoch: 2268 Score: 11
Epoch: 2269 Score: 21
Epoch: 2270 Score: 26
Epoch: 2271 Score: 23
Epoch: 2272 Score: 20
Epoch: 2273 Score: 12
Epoch: 2274 Score: 30
Epoch: 2275 Score: 30
Epoch: 2276 Score: 14
Epoch: 2277 Score: 58
Epoch: 2278 Score: 37
Epoch: 2279 Score: 11
Epoch: 2280 Score: 12
Epoch: 2281 Score: 21
Epoch: 2282 Score: 15
Epoch: 2283 Score: 21
Epoch: 2284 Score: 46
Epoch: 2285 Score: 13
Epoch: 2286 Score: 23
Epoch: 2287 Score: 21
Epoch: 2288 Score: 20
Epoch: 2289 Score: 42
Epoch: 2290 Score: 35
Epoch: 2291 Score: 39
Epoch: 2292 Score: 14
Epoch: 229

Epoch: 2621 Score: 22
Epoch: 2622 Score: 15
Epoch: 2623 Score: 29
Epoch: 2624 Score: 16
Epoch: 2625 Score: 17
Epoch: 2626 Score: 44
Epoch: 2627 Score: 19
Epoch: 2628 Score: 16
Epoch: 2629 Score: 9
Epoch: 2630 Score: 13
Epoch: 2631 Score: 17
Epoch: 2632 Score: 32
Epoch: 2633 Score: 14
Epoch: 2634 Score: 11
Epoch: 2635 Score: 38
Epoch: 2636 Score: 18
Epoch: 2637 Score: 41
Epoch: 2638 Score: 21
Epoch: 2639 Score: 27
Epoch: 2640 Score: 18
Epoch: 2641 Score: 12
Epoch: 2642 Score: 17
Epoch: 2643 Score: 44
Epoch: 2644 Score: 11
Epoch: 2645 Score: 12
Epoch: 2646 Score: 12
Epoch: 2647 Score: 39
Epoch: 2648 Score: 17
Epoch: 2649 Score: 68
Epoch: 2650 Score: 32
Epoch: 2651 Score: 13
Epoch: 2652 Score: 14
Epoch: 2653 Score: 21
Epoch: 2654 Score: 33
Epoch: 2655 Score: 15
Epoch: 2656 Score: 26
Epoch: 2657 Score: 22
Epoch: 2658 Score: 15
Epoch: 2659 Score: 8
Epoch: 2660 Score: 38
Epoch: 2661 Score: 41
Epoch: 2662 Score: 47
Epoch: 2663 Score: 11
Epoch: 2664 Score: 17
Epoch: 2665 Score: 57
Epoch: 2666 

Epoch: 2994 Score: 16
Epoch: 2995 Score: 12
Epoch: 2996 Score: 11
Epoch: 2997 Score: 36
Epoch: 2998 Score: 29
Epoch: 2999 Score: 25
Epoch: 3000 Score: 27
Epoch: 3001 Score: 14
Epoch: 3002 Score: 13
Epoch: 3003 Score: 26
Epoch: 3004 Score: 13
Epoch: 3005 Score: 20
Epoch: 3006 Score: 42
Epoch: 3007 Score: 36
Epoch: 3008 Score: 21
Epoch: 3009 Score: 30
Epoch: 3010 Score: 15
Epoch: 3011 Score: 16
Epoch: 3012 Score: 23
Epoch: 3013 Score: 33
Epoch: 3014 Score: 12
Epoch: 3015 Score: 27
Epoch: 3016 Score: 16
Epoch: 3017 Score: 14
Epoch: 3018 Score: 12
Epoch: 3019 Score: 23
Epoch: 3020 Score: 19
Epoch: 3021 Score: 17
Epoch: 3022 Score: 33
Epoch: 3023 Score: 16
Epoch: 3024 Score: 44
Epoch: 3025 Score: 22
Epoch: 3026 Score: 79
Epoch: 3027 Score: 41
Epoch: 3028 Score: 30
Epoch: 3029 Score: 15
Epoch: 3030 Score: 20
Epoch: 3031 Score: 23
Epoch: 3032 Score: 18
Epoch: 3033 Score: 59
Epoch: 3034 Score: 27
Epoch: 3035 Score: 12
Epoch: 3036 Score: 17
Epoch: 3037 Score: 14
Epoch: 3038 Score: 31
Epoch: 303

Epoch: 3367 Score: 16
Epoch: 3368 Score: 14
Epoch: 3369 Score: 16
Epoch: 3370 Score: 10
Epoch: 3371 Score: 13
Epoch: 3372 Score: 35
Epoch: 3373 Score: 15
Epoch: 3374 Score: 17
Epoch: 3375 Score: 11
Epoch: 3376 Score: 15
Epoch: 3377 Score: 26
Epoch: 3378 Score: 34
Epoch: 3379 Score: 14
Epoch: 3380 Score: 15
Epoch: 3381 Score: 15
Epoch: 3382 Score: 16
Epoch: 3383 Score: 13
Epoch: 3384 Score: 32
Epoch: 3385 Score: 40
Epoch: 3386 Score: 25
Epoch: 3387 Score: 25
Epoch: 3388 Score: 70
Epoch: 3389 Score: 15
Epoch: 3390 Score: 14
Epoch: 3391 Score: 15
Epoch: 3392 Score: 10
Epoch: 3393 Score: 24
Epoch: 3394 Score: 123
Epoch: 3395 Score: 30
Epoch: 3396 Score: 13
Epoch: 3397 Score: 43
Epoch: 3398 Score: 15
Epoch: 3399 Score: 23
Epoch: 3400 Score: 16
Epoch: 3401 Score: 18
Epoch: 3402 Score: 19
Epoch: 3403 Score: 36
Epoch: 3404 Score: 11
Epoch: 3405 Score: 8
Epoch: 3406 Score: 18
Epoch: 3407 Score: 23
Epoch: 3408 Score: 32
Epoch: 3409 Score: 29
Epoch: 3410 Score: 18
Epoch: 3411 Score: 18
Epoch: 341

KeyboardInterrupt: 