In [19]:
import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout

def gather_data(env):
    num_trials = 10000
    min_score = 50
    sim_steps = 500
    trainingX, trainingY = [], []
    
    scores = []
    for _ in range(num_trials):
        observation = env.reset()
        score = 0
        training_sampleX, training_sampleY = [], []
        for step in range(sim_steps):
            action = np.random.randint(0, 2)
            one_hot_action = np.zeros(2)
            one_hot_action[action] = 1
            training_sampleX.append(observation)
            training_sampleY.append(one_hot_action)
            
            observation, reward, done, _ = env.step(action)
            score += reward
            if done:
                break;
        if score > min_score:
            scores.append(score)
            trainingX += training_sampleX
            trainingY += training_sampleY
            
    trainingX, trainingY = np.array(trainingX), np.array(trainingY)
    print("Average: {}".format(np.mean(scores)))
    print("Median: {}".format(np.median(scores)))
    print(trainingY)
    return trainingX, trainingY

def create_model():
    model = Sequential()
    model.add(Dense(128, input_shape=(4,), activation="relu"))
    model.add(Dropout(0.06))
    
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.06))
    
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.06))
    
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.06))
    
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.06))
    model.add(Dense(2, activation="softmax"))
    
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model

def predict():
    env = gym.make("CartPole-v0")
    trainingX, trainingY = gather_data(env)
    model = create_model()
    model.fit(trainingX, trainingY, epochs=5)
    
    scores = []
    num_trials = 50
    sim_steps = 500
    for _ in range(num_trials):
        observation = env.reset()
        score = 0
        for step in range(sim_steps):
            action = np.argmax(model.predict(observation.reshape(1,4)))
            observation, reward, done, _ = env.step(action)
            score += reward
            if done:
                break
        scores.append(score)
    print(np.mean(scores))
    
predict()

Average: 63.30383480825959
Median: 59.0
[[1. 0.]
 [1. 0.]
 [0. 1.]
 ...
 [0. 1.]
 [0. 1.]
 [0. 1.]]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
148.48


In [27]:
import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout

def create_model():
    model = Sequential()
    model.add(Dense(128, input_shape=(4,), activation="relu"))
    model.add(Dropout(0.06))
    
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.06))
    
    model.add(Dense(512, activation="relu"))
    model.add(Dropout(0.06))
    
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.06))
    
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.06))
    model.add(Dense(2, activation="softmax"))
    
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model

env = gym.make('CartPole-v0')
model = create_model()

for _ in range(100):#number of trials
    batchX = []
    batchY = []
    observation = env.reset()
    for i in range(100):#batch size
        batchX.append(observation)
        out = model.predict(observation.reshape(1, 4))
        action = np.argmax(out)
        observation, reward, done, _ = env.step(action)
        out[0][action] = reward
        batchY.append(out[0])
        if(done):
            print(i)
            break
    batchX = np.array(batchX)
    batchY = np.array(batchY)
    model.fit(batchX, batchY, epochs=1)

done = False;

for _ in range(10):#number of test trials
    observation = env.reset()
    score = 0
    while not done:
        action = np.argmax(model.predict(observation.reshape(1, 4)))
        observation, reward, done, _ = env.step(action)
        score += reward
    print(score)

7
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
7
Epoch 1/1
7
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
10
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
7
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
10
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
7
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
10
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1


9
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
9
Epoch 1/1
8
Epoch 1/1
8
Epoch 1/1
10
Epoch 1/1
9
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
7
Epoch 1/1
8
Epoch 1/1
9
Epoch 1/1
10.0
0
0
0
0
0
0
0
0
0


In [3]:
import random
import gym
import numpy as mp
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

from collections import deque

class DQN:
    def __init__(self, env):
        self.env = env
        self.memory = deque(maxlen=2000)
        
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.01
        self.tau = 0.05
        
        self.model = self.create_model()
        self.target_model = self.create_model()
    def create_model():
        model = Sequential()
        state_shape = self.env.observation_space.shape
        model.add(Dense(24, input_dim=state_shape[0], activation = "relu"))
        model.add(Dense(48, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(self.env.action_space.n))
        model.compile(loss="mean_squared_error", optimizer=Adam(lr=self.learning_rate))
        return model
    def remember(self, state, action, reward, new_state, done):
        self.memory.append([state, action, reward, new_state, done])
    def replay(self):
        batch_size = 32
        if len(self.memory) < batch_size:
            return
        
        samples = random.sample(self.memory, batch_size)
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = self.target_model.predict(state)
            if done:
                target[0][action] = reward
            else:
                Q_future = max(self.target_model.predict(new_state)[0])
                target[0][action] = reward + Q_future * self.gamma
            self.model.fit(state, target, epochs=1, verbose=0)
    def target_train(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i]
        self.target_model.set_weights(target_weights)
    def act(self, state):
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)
        if np.random.random() < self.epsilon:
            return self.env.action_space.sample()
        return np.argmax(self.model.predict(state)[0])
    def save_model(self, fn):
        self.model.save(fn)

def main():
    env = gym.make("MountainCar-v0")
    gamma = 0.9
    epsilon = 0.95
    
    trials = 100
    trials_len = 500
    
    updateTargetNetwork = 1000
    dqn_agent = DQN(env = env)
    steps = []
    for trial in range(trials):
        cur_state = env.reset().reshape(1, 2)
        for step in range(trial_len):
            action = dqn_agent.act(cur_shape)
            env.render()
            new_state, reward, done, _ = env.step(action)
            
            reward = reward if not done else -20
            print(reward)
            new_state = new_state.reshape(1, 2)
            dqn_agent.remember(cur_state, action, reward, new_state, done)
            
            dqn_agent.replay()
            dqn_agent.target_train()
            
            cur_state = new_state
            if done:
                break
        if step >= 199:
            print("Failed to compile trial")
        else:
            print("Completed in {} trials".format(trial))
            break

if __name__ == "__main__":
    main()

TypeError: create_model() takes 0 positional arguments but 1 was given