# Example: Usage of RL for Stowage Planning

## Imports

Firstly various modules are imported (including agent classes, environment classes, a plotting unit and a logger).

In [None]:
import os
import sys
import pickle
import numpy as np
from pathlib import Path

sys.path.insert(0, os.path.abspath('../'))
module_path = str(os.getcwd())+str(Path('/out/'))

from env import roroDeck
from analysis import *
from analysis.algorithms import *
from analysis.evaluator import Evaluator
from analysis.loggingUnit import LoggingBase


from agent.tdq import TDQLearning
from agent.sarsa import SARSA
from agent.dqn import DQLearningAgent

## Load a model and show input data

#### Set paths to trained models
By using the path by `LoggingBase(prepare_training = False).module_path` the path is set to the location where RL-systems are stored when using `Training.iynb`. The actual path and file name needs to added. This depends on date and time of production, algorithm type and problem set.

The following information needs to be added in the following form:

`"/YYYYMMDD/hhmm/hhmmAlgorithm_LX_RX.type"`

For example:

`"/20201118/1927/1927SARSA_L8-R12.p"`
RL-system was produced on 18.11.2020 19:27 trained with SARSA for 8 Lanes and 12 Rows.
All trained models are in the folder:

    .../thesisSB/analysis/out

In [None]:
# Modify if own trained models should be tested

path_string_1 = "20201118/1927/1927SARSA_L8-R14.p"

**Or** specify the path to trained prototypes. For example:

`"DQLearning/L08-R14-L0/DQLearning_L08-R14-L0.h5"`


**Note: It is assumed that the folder `output` is on the same level as the folder `thesisSB`.**

In [None]:
# Modify to test submitted prototypes
path_string_2 = "SARSA/L08-R14-L0/SARSA_L08-R14-L0.p"

#path_string_2 = "DQLearning/L08-R14-L0/DQLearning_L08-R14-L0.h5"
#path_string_2 = "QLearning/L08-R14-L0/TDQ_L08-R14-L0.p"


Choose if `path_string_2` (submitted prototypes) or `path_string_1` (own trained models with `Training.iynb`) is used.

In [None]:
use_path_string_2 = True

In [None]:
path = LoggingBase(prepare_training = False).module_path
if use_path_string_2:
    path = path.parents[2].joinpath('output/').joinpath(path_string_2)
else:
    path = path.joinpath(path_string_1)

print('Will load model saved at:\n'+str(path))

## Set environment according to input data

The environment input data must fit to the trained model (number of lanes and rows, loading list

In [None]:
#Pass loading list by setting RoRo-deck environment th argument vehicle_data
# For example: roroDeck.RoRoDeck(vehicle_data=loading_list_1)

loading_list_1 = np.array([[ 0,  1,  2,  3,  4,  5,  6],
                           [ 5,  5, -1, -1,  2,  2,  2],
                           [ 1,  1,  0,  0,  1,  1,  1],
                           [ 1,  2,  1,  2,  2,  1,  2],
                           [ 2,  3,  2,  3,  2,  2,  3],
                           [ 0,  0,  0,  0,  1,  0,  0]])

loading_list_2 = np.array([[0,   1,  2,  3,  4], 
                           [5,   5, -1, -1,  2], 
                           [1,   1,  0,  0,  1], 
                           [1,   2,  1,  2,  2], 
                           [3,   4,  2,  3,  2], 
                           [0,   0,  0,  0,  1]]) 


# Environment dimensions must fit to the prototype
env = roroDeck.RoRoDeck(lanes=8, rows=14)

## Create agent, bind environment and load model

**Specify the algorithm type according to the trained model**

In [None]:
#agent = TDQLearning(env,path)
agent = SARSA(env,path)
#agent = DQLearningAgent(env,path)

In [None]:
#Add specific path
agent.load_model(path)

env = agent.env
evaluator = Evaluator(env.vehicle_data, env.grid)

if not path.exists():
    print('No such file. Check path!') 
else:
    if path.suffix == ".p":   #Check if it is a pickle file
        for info in agent.q_table["ModelParam"]:
            print(info +': '+ str(agent.q_table["ModelParam"][info]))
    else:
        print(agent.info)

## Show best stowage plan the agent found

In [None]:
env.reset()
agent.execute(env)
evaluation = evaluator.evaluate(env.get_stowage_plan())
env.render()
print(evaluation)

Show the loading sequence which constructed this stowage plan 

In [None]:
print(env.loading_sequence)

# Human Interface

#### Construct a stowage plan sequentially

Example of a an human-agent interaction:
The agent shows the predictions for each action in a given state and recommends the best one.
If the environment is set to deterministic behaviour the stowage plan above is reconstructed if all recommendations are obeyed

Usage (Type the following on the Keyboard and press Enter):

0,1,2 &nbsp; &nbsp; &nbsp; &nbsp; *number from the list of possible actions*

r &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; *show the current state of the RORO-deck*

b &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*follow the best action*

e &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*stop interaction by only following the recommendations of agent*

q &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*quit the execution*




In [None]:
state = env.reset()
done = False
counter = 1
while not done:
    print("\n"+str(counter)+". Vehicle\nPossible Actions for Lane "+str(env.current_Lane)
          +"\n"+str(env.possible_actions))
    source = []
    for action in env.possible_actions:
        source += [round(float(agent.predict(state,action)),2)]
    best_action = agent.max_action(state,env.possible_actions)
    
    #print(env.possible_actions)
    #source = DQN_agent.q_eval.predict(state[np.newaxis, :])
    #best_action = DQN_agent.maxAction(state,env.possible_actions)
    #print("Prediction of Agent:\n"+str(source[0][env.possible_actions]))
    print("Prediction of Agent:\n"+str(source))
    print("--> Choose: "+str(best_action))
    print("\n(or 'r' for render \t 'e' for execute \t 'b' for best action \t 'q' for quitting the program)")
    
    action = input() 
    try:
        action = int(action)
        if int(action) in env.possible_actions:
            state, reward, done, info = env.step(int(action))
            counter += 1            
    except:
        if action == 'b':
            state, reward, done, info = env.step(int(best_action))
            counter += 1
        elif action == 'e':
            #DQN_agent.execute(env)
            agent.execute(env)
            #agent.execute()
            break
        elif action == 'q':
            print("Quit interaction mode")
            break
        elif action == 'r':
            env.render()
env.render()
print("\n\n")
evaluation = evaluator.evaluate(env.get_stowage_plan())
print(evaluation)
print("\n\n")
print(env.loading_sequence)