In [85]:
import gym, theano, keras
import numpy as np
import random
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import *
from keras import backend as K

In [63]:
# https://jaromiru.com/2016/10/03/lets-make-a-dqn-implementation/

# Environment
#     run()           # runs one episode
 
# Agent
#     act(s)          # decides what action to take in state s 
#     observe(sample) # adds sample (s, a, r, s_) to memory
#     replay()        # replays memories and improves
 
# Brain
#     predict(s)      # predicts the Q function values in state s
#     train(batch)    # performs supervised training step with batch
 
# Memory
#     add(sample)     # adds sample to memory
#     sample(n)       # returns random batch of n samples

In [64]:
# creating environment class 
game = 'LunarLander-v2'
env = gym.make('LunarLander-v2')
sed = 1
np.random.seed(sed)
random.seed(sed)
env.seed(sed)
action_size = env.action_space.n



In [65]:
LEARNING_RATE = 0.0001
GAMMA = 0.95

INITIAL_EPSILON = 0.5
FINAL_EPSILON = 0.001
EPSILON_DECAY = 0.99995
ALPHA = 0.1
TRAIN_BATCH_SIZE = 20
REPLAY_MEMORY_SIZE = 20000
UPDATE_TARGET_FREQ= 1000

In [80]:
class Agent:
    
    steps = 0
    epsilon = INITIAL_EPSILON
    
    def __init__(self, stateCnt, actionCnt):
        self.stateCnt = stateCnt
        self.actionCnt = actionCnt
        
        self.brain = Brain(stateCnt, actionCnt)
        self.memory = Memory(REPLAY_MEMORY_SIZE)
        
    def act(self, s):
        if np.random.rand() < self.epsilon:
            return random.randint(0, self.actionCnt-1)
        
        else:
            return np.argmax(self.brain.predictOne)
        
    def observe(self, sample):
        self.memory.add(sample)
        
        if self.steps % UPDATE_TARGET_FREQ == 0:
            self.brain.updateTargetModel()
            
        self.steps += 1
        self.epsilon = FINAL_EPSILON + (INITIAL_EPSILON - FINAL_EPSILON
                        ) * math.exp(-EPSILON_DECAY * self.steps)
        
    def replay(self):
        batch = self.memory.sample(TRAIN_BATCH_SIZE)
        batchlen = len(batch)
        
        no_state = numpy.zeros(self.stateCnt)
        states = numpy.array([x[0] for x in batch])
        states1 = numpy.array([(-1 if x[3] is None else x[3])
                               for o in batch])
        
        
        p = self.brain.predict(states)
        p1 = self.brain.predict(states1, target=True)
        
        x = numpy.zeros((batchlen, self.stateCnt))
        y = np.zeros((batchlen, self.actionCnt))
        
        for i in range(batchlen):
            x = batch[i]
            s = x[0]
            a = x[1]
            r = x[2]
            s1 = x[3]
            
            t = p[i]
            if s1 is None:
                t[a] = r
            else:
                t[a] = r + GAMMA * np.amax(p1[i])
                
            xx[i] = s
            yy[i] = t
            
        self.brain.train(x,y)

In [81]:
class Brain:
    # The Brain class encapsulates the neural network.
    def __init__(self, stateCnt, actionCnt):
        self.stateCnt = stateCnt
        self.actionCnt = actionCnt
        
        self.model = Sequential([
            Dense(512, input_shape=(stateCnt,)),
            Activation('relu'),
            Dense(512),
            Activation('relu'),
            Dense(actionCnt, activation='linear')
        ])
        opt = Adam(lr=LEARNING_RATE)
        self.model.compile(optimizer=opt,
                           loss='mse')


    def train(self, x, y, epochs=1, verbose=False):
        self.model.fit(x,y, batch_size=TRAIN_BATCH_SIZE, epochs=1, verbose=verbose)

    def predict(self, s, target=False):
        return self.model.predict(s)
        
    def predictOne(self, s, target=False):
        return self.predict(s.reshape(1, self.stateCnt), target=target).flatten()
    
    def updateTargetModel(self):
        self.model.set_weight(self.model.get_weights())

In [82]:
class Memory:

    def __init__(self, memory):
        self.memory = memory
        self.replay_memory = []
        
        
    def add(self, sample):
        self.replay_memory.append(sample)        

        if len(self.replay_memory) > self.memory:
            self.replay_memory.pop(0)

    def sample(self, n):
        n = min(n, len(self.replay_memory))
        return random.sample(self.replay_memory, n)

    def isFull(self):
        return len(self.replay_memory) >= self.memory

In [83]:
class Environment:
    
    def __init__(self, problem):
        self.problem = problem
        self.env = gym.make(problem)
        
    def run(self, agent):
        s =  self.env.reset()
        R = 0
        
        while True:
            a = agent.act(s)
            
            s1, r, done, inf = self.env.step(a)
            
            if done:
                s1 = None
                
            agent.observe((s,a,r,s1))
            agent.replay()
            
            s = s1
            R += r
            
            if done:
                break
                
        print ("Total reward: ", R)
                
                
            
            
            

In [86]:
env = Environment('CartPole-v0')
stateCnt = env.env.observation_space.shape[0]
actionCnt = env.env.action_space.n

agent = Agent(stateCnt, actionCnt)

while agent.steps < 10000:
    env.run(agent)

AttributeError: 'Sequential' object has no attribute 'set_weight'