## Solving CartPole problem using DeepSARSA
> 딥살사 알고리즘을 활용하여 카트폴 문제를 풀 수 있을까?

In [1]:
import sys, os
import gym
import pylab
import numpy as np
import random
import copy
import matplotlib.pyplot as plt
from IPython import display
%matplotlib inline

In [2]:
from gym import envs
# print(envs.registry.all())

In [114]:
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

model_name='./save_model/deep_sarsa.h5'
image_name='./save_graph/deep_sarsa.png'

class DeepSARSAgent:
    def __init__(self, env):
        self.env = env
        self.action_size = env.action_space.n
        self.state_dim = env.observation_space.shape[0]
        self.learning_rate = 0.001
        self.discount_factor = 0.999
        self.model = self.build_network()
        
    def build_network(self):
        model = Sequential()
        model.add(Dense(24, activation='relu', input_dim=self.state_dim))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='softmax'))
        model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))
#         model.add(Dense(self.action_size, activation='linear'))
#         model.compile(loss="mse", optimizer='rmsprop')
        return model

    def get_action(self, state):
        state = np.float32(state)
        state = np.reshape(state, [1,4])
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])
    
    def train_model(self, state, action, reward, next_state, next_action, done):
        state = np.float32(state)
        state = np.reshape(state, [1,4])
        next_state = np.reshape(next_state, [1,4])
        
        target = self.model.predict(next_state)[0]
        if done:
            target[action] = reward
        else:
            target[action] = (reward + self.discount_factor * self.model.predict(next_state)[0][next_action])
        target = np.reshape(target, [1,2])
        
        self.model.fit(state, target, epochs=1, verbose=0)
    
    def has_model(self, filename):
        return os.path.exists(filename)

    def save_weights(self, filename):
        self.model.save_weights(filename)
        print("model saved '{}'".format(filename))

    def load_weights(self, filename):
        self.model.load_weights(filename)
        print("model loaded '{}'".format(filename))
        
    def reset_weights(self, filename):
        if os.path.isfile(filename):
            os.remove(filename)
        
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

def discretize(state):
    state = state.reshape(-1, 1)
    scaler.fit(state)
    state=scaler.transform(state)
    state = np.reshape(state, [1, 4])
    return state

    if agent.has_model(model_name):
        agent.load_weights(model_name)
        
    while True:
        action = agent.get_action(state)
    env.close()

def test():
    env = gym.make('CartPole-v0')
    agent = DeepSARSAgent(env)
    if agent.has_model(model_name):
        agent.load_weights(model_name)
        
    wenv = gym.wrappers.Monitor(env, 'cartpole')
    t = 0
    done = False
    current_state = discretize(wenv.reset())
    while not done:
        wenv.render()
        t = t+1
        action = agent.get_action(current_state)
        obs, reward, done, _ = wenv.step(action)
        new_state = discretize(obs)
        current_state = new_state
    return t
        
def learn(EPISODES, RENDERED, RESET):
    env = gym.make('CartPole-v0')
    agent = DeepSARSAgent(env)
    if agent.has_model(model_name):
        if RESET:
            agent.reset_weights(model_name)
        else:
            agent.load_weights(model_name)

    global_score=0

    for e in range(EPISODES+1):
        if RENDERED:
            env.render()
        done = False
        score = 0
        state = env.reset()
        state = discretize(state)

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            next_state = discretize(next_state)
            
            next_action = agent.get_action(next_state)
            agent.train_model(state, action, reward, next_state, next_action, done)

            state = next_state
            score += reward
            state = copy.deepcopy(next_state)

            if done:
                global_score += score
                if e > 0 and e % 100 == 0:
                    gavg = global_score / e
                    print("gavg:{}, \t gscore:{}, epsd:{}".format(math.ceil(gavg), global_score, e))
                
    agent.save_weights(model_name)
    env.close()

In [115]:
EPISODES = 200
RENDERED = False
RESET = True
learn(EPISODES, RENDERED, RESET)

gavg:10, 	 gscore:952.0, epsd:100
gavg:10, 	 gscore:1892.0, epsd:200
model saved './save_model/deep_sarsa.h5'


In [94]:
import os
if os.path.isdir('./cartpole'):
    import shutil
    shutil.rmtree('./cartpole')
t = test()
print(t)

KeyError: "Unable to open object (object 'dense_182' doesn't exist)"