# Example: Usage of RL for Stowage Planning

## Imports

Firstly various modules are imported (including agent classes, environment classes, a plotting unit and a logger)

In [None]:
import os
import sys
sys.path.insert(0, os.path.abspath('../'))
module_path = str(os.getcwd())+'\\out\\'

from env import roroDeck
from agent import sarsa, tdq, dqn
from analysis import *
from analysis.algorithms import *
from analysis.evaluator import Evaluator
from analysis.loggingUnit import LoggingBase


from agent.tdq import TDQLearning
from agent.sarsa import SARSA
from agent.dqn import DQLearningAgent

import pickle



from keras.models import Sequential, load_model
import numpy as np

## Load a model and show input data

#### Set path to saved prototype
See comments for examples. The path will depend on the loaction of the files.

In [None]:
# Set relative path to loacation where Training.iynb will safe files:
path = LoggingBase(prepare_training = False).module_path

# Set relative path to other loacation, e.g. submitted prototypes
path = '\\'.join(path.split('\\')[0:-4])

print('Relative path is set to:\n'+path+'\n')

#add specific location to file. For example:
path = path + "20201118\\1927\\1927SARSA_L8-R12.p"
print('Will load prototype saved at:\n'+path)

#### Set environment according to input data

In [None]:
#Pass loading list by setting RoRo-deck environment th argument vehicle_data
# For example: roroDeck.RoRoDeck(vehicle_data=loading_list_1)

loading_list_1 = np.array([[ 0,  1,  2,  3,  4,  5,  6],
                           [ 5,  5, -1, -1,  2,  2,  2],
                           [ 1,  1,  0,  0,  1,  1,  1],
                           [ 1,  2,  1,  2,  2,  1,  2],
                           [ 2,  3,  2,  3,  2,  2,  3],
                           [ 0,  0,  0,  0,  1,  0,  0]])

loading_list_2 = np.array([[0,   1,  2,  3,  4], 
                           [5,   5, -1, -1,  2], 
                           [1,   1,  0,  0,  1], 
                           [1,   2,  1,  2,  2], 
                           [3,   4,  2,  3,  2], 
                           [0,   0,  0,  0,  1]]) 


# Environment dimensions must fit to the prototype
env = roroDeck.RoRoDeck(lanes=8, rows=12)

#### Create agent, bind environment and load model

In [None]:
#agent = tdq.TDQLearning(env,path)
agent = sarsa.SARSA(env,path)
#agent = dqn.DQLearningAgent(env,path)

#Add specific path
agent.load_model(path)

env = agent.env
evaluator = Evaluator(env.vehicle_data, env.grid)

if type(agent) is not type(dqn.DQLearningAgent(env,path)):
    for info in agent.q_table["ModelParam"]:
        print(info +': '+ str(agent.q_table["ModelParam"][info]))
else:
    print(agent.info)

## Show best stowage plan the agent found

In [None]:
env.reset()
agent.execute(env)
evaluation = evaluator.evaluate(env.get_stowage_plan())
env.render()
print(evaluation)

Show the loading sequence which constructed this stowage plan 

In [None]:
print(env.loading_sequence)

# Human Interface

#### Construct a stowage plan sequentially

Example of a an human-agent interaction:
The agent shows the predictions for each action in a given state and recommends the best one.
If the environment is set to deterministic behaviour the stowage plan above is reconstructed if all recommendations are obeyed

Usage (Type the following on the Keyboard and press Enter):

0,1,2 &nbsp; &nbsp; &nbsp; &nbsp; *number from the list of possible actions*

r &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; *show the current state of the RORO-deck*

b &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*follow the best action*

f &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*stop interaction by only following the recommendations of agent*

q &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*quit the execution*




In [None]:
state = env.reset()
done = False
counter = 1
while not done:
    print("\n"+str(counter)+". Vehicle\nPossible Actions for Lane "+str(env.current_Lane)
          +"\n"+str(env.possible_actions))
    source = []
    for action in env.possible_actions:
        source += [round(float(agent.predict(state,action)),2)]
    best_action = agent.max_action(state,env.possible_actions)
    
    #print(env.possible_actions)
    #source = DQN_agent.q_eval.predict(state[np.newaxis, :])
    #best_action = DQN_agent.maxAction(state,env.possible_actions)
    #print("Prediction of Agent:\n"+str(source[0][env.possible_actions]))
    print("Prediction of Agent:\n"+str(source))
    print("--> Choose: "+str(best_action))
    action = input() 
    try:
        action = int(action)
        if int(action) in env.possible_actions:
            state, reward, done, info = env.step(int(action))
            counter += 1            
    except:
        if action == 'b':
            state, reward, done, info = env.step(int(best_action))
            counter += 1
        elif action == 'f':
            #DQN_agent.execute(env)
            agent.execute(env)
            #agent.execute()
            break
        elif action == 'q':
            print("Quit execution mode")
            break
        elif action == 'r':
            env.render()
env.render()
print("\n\n")
evaluation = evaluator.evaluate(env.get_stowage_plan())
print(evaluation)
print("\n\n")
print(env.loading_sequence)