# Example: Usage of RL for Stowage Planning

## Imports

Firstly various modules are imported (including agent classes, environment classes, a plotting unit and a logger)

In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath('../'))
module_path = str(os.getcwd())+'\\out\\'

from env import roroDeck
from agent import sarsa, tdq, dqn
from analysis import *
from analysis.algorithms import *
from analysis.evaluator import Evaluator
from analysis.loggingUnit import LoggingBase


from agent.tdq import TDQLearning
from agent.sarsa import SARSA
from agent.dqn import DQLearningAgent

import pickle



from keras.models import Sequential, load_model
import numpy as np

Using TensorFlow backend.


## Load a model and show input data

#### Set path to saved prototype
See comments for examples. The path will depend on the loaction of the files.

In [2]:
# Set relative path to loacation where Training.iynb will safe files:
path = LoggingBase(prepare_training = False).module_path

# Set relative path to other loacation, e.g. submitted prototypes
path = '\\'.join(path.split('\\')[0:-4])

print('Relative path is set to:\n'+path+'\n')

#add specific location to file. For example:
#path = path + "20201118\\1927\\1927SARSA_L8-R12.p"
path = path +"\\Masterarbeit\\output\\DQLearning\\L08-R12-L0\\DQLearning_L08-R12-L0.h5"
print('Will load prototype saved at:\n'+path)

Relative path is set to:
C:\Users\braun\Documents

Will load prototype saved at:
C:\Users\braun\Documents\Masterarbeit\output\DQLearning\L08-R12-L0\DQLearning_L08-R12-L0.h5


#### Set environment according to input data

In [3]:
#Pass loading list by setting RoRo-deck environment vehicle_data

loading_list_1 = np.array([[ 0,  1,  2,  3,  4],
                           [ 5,  5, -1, -1,  2],
                           [ 1,  1,  0,  0,  1],
                           [ 1,  2,  1,  2,  2],
                           [ 3,  4,  2,  3,  2],
                           [ 0,  0,  0,  0,  1]])

loading_list_2 = np.array([[ 0,  1,  2,  3,  4,  5,  6],
                           [ 5,  5, -1, -1,  2,  2,  2],
                           [ 1,  1,  0,  0,  1,  1,  1],
                           [ 1,  2,  1,  2,  2,  1,  2],
                           [ 2,  3,  2,  3,  2,  2,  3],
                           [ 0,  0,  0,  0,  1,  0,  0]])


# Environment dimensions must fit to the prototype
env = roroDeck.RoRoDeck(lanes=8, rows=12)

#### Create agent, bind environment and load model

In [4]:
#agent = tdq.TDQLearning(env,path)
#agent = sarsa.SARSA(env,path)
agent = dqn.DQLearningAgent(env,path)

#Add specific path
agent.load_model(path)

env = agent.env
evaluator = Evaluator(env.vehicle_data, env.grid)

if type(agent) is not type(dqn.DQLearningAgent(env,path)):
    for info in agent.q_table["ModelParam"]:
        print(info +': '+ str(agent.q_table["ModelParam"][info]))
else:
    print(agent.info)
    print(agent.q_eval.summary())

DQ-Agent:		 ALPHA: 0.0005 
			 GAMMA: 0.999
			 Replay Buffer Memory Size: 1000000
			 Model name: None
			 Epsilon Decrement: 0.9996
			 Batch Size: 32
			 Iterations: 12000
			 Pretraining End Episode: 10000
 Information on Q-Network
********************************************************************************
NN has 
 					2 layers with relu activation 
					0.001 L2-activity regularisation in each layer
					Adam-Optimiser with learning rate 0.0005 
					Mean Squared Error- loss function
********************************************************************************

 Information on target Q-Network (Identical with Q-Network)
********************************************************************************
NN has 
 					2 layers with relu activation 
					0.001 L2-activity regularisation in each layer
					Adam-Optimiser with learning rate 0.0005 
					Mean Squared Error- loss function
********************************************************************************

Model: "sequ

## Show best stowage plan the agent found

In [5]:
env.reset()
agent.execute(env)
evaluation = evaluator.evaluate(env.get_stowage_plan())
env.render()
print(evaluation)

-----------Loading Sequence----------------------------------------------------------------
X	X	X	X	X	X	X	X	

X	X	X	1	2	X	X	X	

X	X	3	1	2	4	X	X	

X	5	3	6	2	4	7	X	

8	5	9	6	10	4	7	11	

8	12	9	13	10	14	7	11	

15	12	16	13	10	14	17	11	

15	18	16	19	20	21	17	22	

23	18	24	19	20	21	17	22	

23	25	24	26	27	28	29	30	

23	25	31	26	27	28	29	30	

-	-	31	-	-	-	-	-	

-----------VehicleType--------------------------------------------------------------------
X	X	X	X	X	X	X	X	

X	X	X	0	1	X	X	X	

X	X	0	0	1	1	X	X	

X	0	0	0	1	1	1	X	

4	0	0	0	1	1	1	1	

4	2	0	2	1	2	1	1	

4	2	2	2	1	2	3	1	

4	2	2	2	2	2	3	2	

3	2	2	2	2	2	3	2	

3	2	2	2	2	2	2	2	

3	2	2	2	2	2	2	2	

-	-	2	-	-	-	-	-	

-----------Destination--------------------------------------------------------------------
X	X	X	X	X	X	X	X	

X	X	X	1	2	X	X	X	

X	X	1	1	2	2	X	X	

X	1	1	1	2	2	2	X	

2	1	1	1	2	2	2	2	

2	1	1	1	2	1	2	2	

2	1	1	1	2	1	2	2	

2	1	1	1	1	1	2	1	

2	1	1	1	1	1	2	1	

2	1	1	1	1	1	1	1	

2	1	1	1	1	1	1	1	

-	-	1	-	-	-	-	-	



****************************

Show the loading sequence which constructed this stowage plan 

In [6]:
print(env.loading_sequence)

Loading Sequence of RORO-Deck (Lanes: 8 Rows: 12)

1. Load Vehicle Type 	 0 	 in Lane: 	 3 	 Row: 	 1 
2. Load Vehicle Type 	 1 	 in Lane: 	 4 	 Row: 	 1 
3. Load Vehicle Type 	 0 	 in Lane: 	 2 	 Row: 	 2 
4. Load Vehicle Type 	 1 	 in Lane: 	 5 	 Row: 	 2 
5. Load Vehicle Type 	 0 	 in Lane: 	 1 	 Row: 	 3 
6. Load Vehicle Type 	 0 	 in Lane: 	 3 	 Row: 	 3 
7. Load Vehicle Type 	 1 	 in Lane: 	 6 	 Row: 	 3 
8. Load Vehicle Type 	 4 	 in Lane: 	 0 	 Row: 	 4 
9. Load Vehicle Type 	 0 	 in Lane: 	 2 	 Row: 	 4 
10. Load Vehicle Type 	 1 	 in Lane: 	 4 	 Row: 	 4 
11. Load Vehicle Type 	 1 	 in Lane: 	 7 	 Row: 	 4 
12. Load Vehicle Type 	 2 	 in Lane: 	 1 	 Row: 	 5 
13. Load Vehicle Type 	 2 	 in Lane: 	 3 	 Row: 	 5 
14. Load Vehicle Type 	 2 	 in Lane: 	 5 	 Row: 	 5 
15. Load Vehicle Type 	 4 	 in Lane: 	 0 	 Row: 	 6 
16. Load Vehicle Type 	 2 	 in Lane: 	 2 	 Row: 	 6 
17. Load Vehicle Type 	 3 	 in Lane: 	 6 	 Row: 	 6 
18. Load Vehicle Type 	 2 	 in Lane: 	 1 	 Row: 	 7 
19. 

# Human Interface

#### Construct a stowage plan sequentially

Example of a an human-agent interaction:
The agent shows the predictions for each action in a given state and recommends the best one.
If the environment is set to deterministic behaviour the stowage plan above is reconstructed if all recommendations are obeyed

Usage (Type the following on the Keyboard and press Enter):

0,1,2 &nbsp; &nbsp; &nbsp; &nbsp; *number from the list of possible actions*

r &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; *show the current state of the RORO-deck*

b &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*follow the best action*

f &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*stop interaction by only following the recommendations of agent*

q &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;*quit the execution*




In [None]:
state = env.reset()
done = False
counter = 1
while not done:
    print("\n"+str(counter)+". Vehicle\nPossible Actions for Lane "+str(env.current_Lane)
          +"\n"+str(env.possible_actions))
    source = []
    for action in env.possible_actions:
        source += [round(float(agent.predict(state,action)),2)]
    best_action = agent.max_action(state,env.possible_actions)
    
    #print(env.possible_actions)
    #source = DQN_agent.q_eval.predict(state[np.newaxis, :])
    #best_action = DQN_agent.maxAction(state,env.possible_actions)
    #print("Prediction of Agent:\n"+str(source[0][env.possible_actions]))
    print("Prediction of Agent:\n"+str(source))
    print("--> Choose: "+str(best_action))
    action = input() 
    try:
        action = int(action)
        if int(action) in env.possible_actions:
            state, reward, done, info = env.step(int(action))
            counter += 1            
    except:
        if action == 'b':
            state, reward, done, info = env.step(int(best_action))
            counter += 1
        elif action == 'f':
            #DQN_agent.execute(env)
            agent.execute(env)
            #agent.execute()
            break
        elif action == 'q':
            print("Quit execution mode")
            break
        elif action == 'r':
            env.render()
env.render()
print("\n\n")
evaluation = evaluator.evaluate(env.get_stowage_plan())
print(evaluation)
print("\n\n")
print(env.loading_sequence)


1. Vehicle
Possible Actions for Lane 3
[0 1 2 3]
Prediction of Agent:
[8.42, 3.43, 2.35, 2.06]
--> Choose: 0
