# Example Stowage Planning with RL

## Imports

Firstly various modules are imported (including agent classes, environment classes, a plotting unit and a logger)

In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath('../'))
module_path = str(os.getcwd())+'\\out\\'

from env import roroDeck
from agent import sarsa, tdq, dqn
from analysis import *
from analysis.algorithms import *
from analysis.evaluator import Evaluator
from analysis.loggingUnit import LoggingBase


from agent.tdq import TDQLearning
from agent.sarsa import SARSA
from agent.dqn import DQLearningAgent

import pickle



from keras.models import Sequential, load_model
import numpy as np

Using TensorFlow backend.


In [3]:
# Set relative path
module_path = LoggingBase(prepare_training = False).module_path
print('Path is set to:\n'+module_path)

Path is set to:
C:\Users\braun\Documents\Masterarbeit\analysis\out\


## Load a model and show input data

In [6]:
env = roroDeck.RoRoDeck()
agent = tdq.TDQLearning(env,module_path)

agent.load_model(module_path+"20201118\\1927\\1927SARSA_L8-R12.p")

env = agent.env
evaluator = Evaluator(env.vehicle_data, env.grid)


for info in agent.q_table["ModelParam"]:
    print(info +': '+ str(agent.q_table["ModelParam"][info]))


Algorithm: SARSA
GAMMA: 0.999
ALPHA: 0.1
Episodes: 6000
EnvLanes:: 8
EnvRows: 12
VehicleData: [[ 0  1  2  3  4]
 [ 5  5 -1 -1  2]
 [ 1  1  0  0  1]
 [ 1  2  1  2  2]
 [ 2  3  2  3  2]
 [ 0  0  0  0  1]]
TrainingTime: 27.007267475128174


In [None]:
env = roroDeck.RoRoDeck()
agent = sarsa.SARSA(env,module_path)

agent.load_model(module_path+"20200911\\1307\\1307TDQ_L8_R12_Rf1_A5.p")

env = agent.env
evaluator = evm.Evaluator(env.vehicle_data, env.grid)


for info in agent.q_table["ModelParam"]:
print(info +': '+ str(agent.q_table["ModelParam"][info]))



In [None]:
#env = roroDeck.RoRoDeck(False,8,10)
#evaluator = evm.Evaluator(env.vehicle_data, env.grid)
#DQN_agent = DQNAgent(gamma=0.999, epsilon=1.0, alpha=0.0004, input_dims=np.shape(env.reset())[0], n_actions=5, mem_size=10000000, batch_size=32, epsilon_end=0.01, epsilon_dec= 0.999992)
#DQN_agent.load_model(module_path+"20200428\\1045\\104520200427stochasticdqn_model.h5")


## Show the best stowage plan the agent found

In [None]:
env.reset()
#print(np.shape(env.current_state))
#DQN_agent.execute(env)
agent.execute(env)
evaluation = evaluator.evaluate(env.get_stowage_plan())
env.render()
print(evaluation)

Show the loading sequence which constructed this stowage plan 

In [None]:
print(env.loading_sequence)

## Construct a stowage plan sequentially

Example of a an human-agent interaction:
Agent shows the predictions for each action in a given state and recommends the best one.
If the Environment is set to deterministic behaviour the stowage plan above is reconstructed if all recommendations are obeyed

Usage (Type the following on the Keyboard and press Enter):

0,1,2 &nbsp; &nbsp; &nbsp; &nbsp; *number from the list of possible actions*

r &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; *show the current state of the RORO-deck*

b &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*follow the best action*

f &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*stop interaction by only following the recommendations of agent*

q &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*quit the execution*




In [None]:
state = env.reset()
done = False
counter = 1
while not done:
    print("\n"+str(counter)+". Vehicle\nPossible Actions for Lane "+str(env.current_Lane)
          +"\n"+str(env.possible_actions))
    source = []
    for action in env.possible_actions:
        source += [round(float(agent.predict(state,action)),2)]
    best_action = agent.max_action(state,env.possible_actions)
    
    #print(env.possible_actions)
    #source = DQN_agent.q_eval.predict(state[np.newaxis, :])
    #best_action = DQN_agent.maxAction(state,env.possible_actions)
    #print("Prediction of Agent:\n"+str(source[0][env.possible_actions]))
    print("Prediction of Agent:\n"+str(source))
    print("--> Choose: "+str(best_action))
    action = input() 
    try:
        action = int(action)
        if int(action) in env.possible_actions:
            state, reward, done, info = env.step(int(action))
            counter += 1            
    except:
        if action == 'b':
            state, reward, done, info = env.step(int(best_action))
            counter += 1
        elif action == 'f':
            #DQN_agent.execute(env)
            agent.execute(env)
            #agent.execute()
            break
        elif action == 'q':
            print("Quit execution mode")
            break
        elif action == 'r':
            env.render()
env.render()
print("\n\n")
evaluation = evaluator.evaluate(env.get_stowage_plan())
print(evaluation)
print("\n\n")
print(env.loading_sequence)