In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath('../'))
module_path = str(os.getcwd())+'\\out\\'

from env import roroDeck
from agent import sarsa, tdq
from analysis import *
from algorithms import Algorithms
from valuation import evaluator as evm
#from valuation import evaluation
#from viz import *

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")
from datetime import datetime


sns.set(style="whitegrid")
#sns.set(font_scale=1, rc={'text.usetex' : True})
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams.update({'font.size': 11})
plt.rcParams.update({'text.color' : "black",
                     'axes.labelcolor' : "black"})
plt.tight_layout()

In [2]:
n_evaluations = 400
time_out = 50_000

Construct a set with **400** (`n_evaluations`) unique stowage plan evaluations.
These evaluations are based on randomly generated stowage plans by the RORO-deck environment.

If the evaluation of a randomly generated stowage plan is equivalent to another evaluation within the set this stowage plan is discarded.
If the this size is not reached within **50_000** (`time_out`) iterations than this procedure is stopped.

The next step loads a model and prints the parameter on how this model was generated.

*Note: TD-Q-Learning and SARSA agents have an equivalent execution method therefore it does not matter which agent is initalised.*

In [3]:
env = roroDeck.RoRoDeck(True, lanes=8, rows=10, stochastic = True)
evaluator = evm.Evaluator(env.vehicle_data, env.grid)

#agent = sarsa.SARSA(env, module_path)
agent = tdq.TDQLearning(env, module_path)

agent.load_model(module_path+"QLearning_ABGABE_20200911\\1143\\1143TDQ_L8_R10_Rf1_A5.p")

for info in agent.q_table["ModelParam"]:
    print(str(info)+'\t\t'+str(agent.q_table["ModelParam"][info]))


Algorithm		Time Difference Q-Learning
GAMMA		0.999
ALPHA		0.1
Episodes		1000000
EnvLanes:		8
EnvRows		10
VehicleData		[[ 0  1  2  3  4]
 [ 5  5 -1 -1  2]
 [ 1  1  0  0  1]
 [ 1  2  1  2  2]
 [ 2  3  2  3  2]
 [ 0  0  0  0  1]]
TrainingTime		4603.0916357040405


In [4]:
def get_sorted_random_stowage_plans():
    random_stowage_plans = set()
    #env = roroDeck.RoRoDeck(True, lanes=10, rows=14)
    #env = agent.env
    i = 0
    while len(random_stowage_plans) < n_evaluations and i < time_out:
        done = False
        env.reset()
        while not done:
            state, reward, done, info = env.step(env.action_space_sample())
        evaluation = evaluator.evaluate(env.get_stowage_plan())
        random_stowage_plans.add(evaluation)
        i+=1
        if i%500 == 0:
            print(str(i)
                  + ' of {}\t unique stowage plan evaluations:\t'.format(time_out)
                  + str(len(random_stowage_plans)))
    if i == time_out:
        print('\n\nWARNING:\tCould not construct {} evaluations.'.format(n_evaluations))
        print('\t\tActual number is {}'.format(len(random_stowage_plans)))

    random_stowage_plans = list(random_stowage_plans)
    random_stowage_plans.sort()
    return random_stowage_plans

In [5]:
def valuate_model(env_local, plans, n=10, info=False):
    performance = []
    for i in range(n):
        env_local.reset()
        #agent.env = env_local
        agent.execute()
        evaluation = evaluator.evaluate(env_local.get_stowage_plan())
        plans_val = plans.copy()
        plans_val.append(evaluation)
        plans_val = list(dict.fromkeys(plans_val))
        
        plans_val.sort()

        
        
        if len(plans_val) == len(plans)+1:
            plans_val = plans_val[1:]
        
        
        
        #at which postion is the stowage plan of the agent. (maximal Performance 100%)
        for ix,i in enumerate(plans_val):
            if i == evaluation:
                if info:
                    print(str(ix+1)+". Position of "+str(len(plans_val))+ \
                      "\t Performance of model: "+str((ix+1)/(len(plans_val))))
                performance += [(ix+1)/(len(plans_val))]
                break
        
    return np.array(performance)

## Create Random Stowage Plans

In [6]:
random_plans = get_sorted_random_stowage_plans()

500 of 50000	 unique stowage plan evaluations:	200
1000 of 50000	 unique stowage plan evaluations:	256
1500 of 50000	 unique stowage plan evaluations:	306
2000 of 50000	 unique stowage plan evaluations:	327
2500 of 50000	 unique stowage plan evaluations:	352
3000 of 50000	 unique stowage plan evaluations:	382


In [7]:
env.p = 0.99
performance99 = valuate_model(env,random_plans, n=100, info=False)
print('Rank at 99%:\nMean:\t',performance99.mean(),'\nStd.:\t',performance99.std())

Rank at 99%:
Mean:	 0.9950249999999999 
Std.:	 0.00043229041164476797


In [8]:
env.p = 0.95
performance95 = valuate_model(env,random_plans, n=100, info=False)
print('Rank at 95%:\nMean:\t',performance95.mean(),'\nStd.:\t',performance95.std())

Rank at 95%:
Mean:	 0.96985 
Std.:	 0.07424942760722132


In [9]:
print('Lowest Rank\nat 99%:\t',performance99.min(),'\nat 95%:\t',performance95.min())

Lowest Rank
at 99%:	 0.9925 
at 95%:	 0.69


In [10]:
from scipy.stats import ranksums
from scipy.stats import mannwhitneyu, kruskal

In [11]:
a = np.linspace(0.6,0,40)

In [12]:
random = random_plans.copy()
for ix, temperature in enumerate(a):
    print('Process {}%\t\tCurrent env.p={}'.format((round(ix*100/len(a),4)),(round(temperature,4))))

    env.stochastic= True
    env.p = temperature
    best_sp = []

    while len(best_sp)<100:
        env.reset()
        agent.execute()
        evaluation = evaluator.evaluate(env.get_stowage_plan())
        if evaluation not in random:
            best_sp += [evaluation]
            
    dummy = random + best_sp
    dummy.sort()
    
    random_ranks = []
    agent_ranks = []
    for ix, i in enumerate(dummy):
        if i in best_sp:
            agent_ranks+=[ix]
        else:
            random_ranks+=[ix]
        
    agent_ranks = np.array(agent_ranks)
    random_ranks = np.array(random_ranks)
    
    _,p = mannwhitneyu(random_ranks, agent_ranks, alternative='two-sided')
    print('\t\t\tMann-Whitney-U p_value:',str(round(p,9)))
    if p > 0.01:
        print('critical p identified')
        break
    
_,p = mannwhitneyu(random_ranks, agent_ranks, alternative='two-sided')
print(temperature)
print(mannwhitneyu(random_ranks, agent_ranks, alternative='two-sided'))
print(ranksums(random_ranks,agent_ranks))
print(kruskal(random_ranks, agent_ranks))


module_path = str(os.getcwd())+'\\out\\'
os.makedirs(module_path, exist_ok=True)


sns.set(style="whitegrid")
#sns.set(font_scale=1, rc={'text.usetex' : True})
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams.update({'font.size': 11})
plt.rcParams.update({'text.color' : "black",
                     'axes.labelcolor' : "black"})
plt.tight_layout()


#fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(2.6, 4))
bw = 50

fig = plt.figure(figsize=(5.1, 2.2))
#fig = plt.figure(figsize=(7.9, 3.3))

fig.tight_layout()
gs = fig.add_gridspec(4, 3)

fi_ax1 = fig.add_subplot(gs[0:3, :])
fi_ax2 = fig.add_subplot(gs[3, :],sharex=fi_ax1)


plt.setp(fi_ax1.get_xticklabels(), visible=False)
plt.setp(fi_ax2.get_xticklabels(), visible=True)


ax = sns.kdeplot(np.array(agent_ranks), bw=bw, clip=[0,len(dummy)],kernel='epa', ax=fi_ax1,label='Agent')
ax = sns.kdeplot(np.array(random_ranks),ax=ax , bw=bw, clip=[0,len(dummy)],kernel='epa',label='Random')

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

ax.legend(loc='center left', bbox_to_anchor=(1, box.y0 +0.5* box.height), fontsize=11*(1/0.9))

ax = sns.stripplot(np.array(agent_ranks),alpha=0.5, ax = fi_ax2,label='Agent')
ax = sns.stripplot(np.array(random_ranks),alpha=0.35, ax = ax, color=sns.color_palette("deep")[1],label='Random')
ax.set_xlim(-bw, len(dummy)+bw)

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

# Put a legend below current axis
#leg = ax.legend(loc='upper left', bbox_to_anchor=(-0.013, -0.6),
#          fancybox=True, shadow=False, ncol=1, fontsize=11*(1/0.9))
#for lh in leg.legendHandles: 
#    lh.set_alpha(1)

    
fi_ax1.set(ylabel='Density')
fi_ax2.set(xlabel='Ranks')
#fi_ax2.xaxis.set_label_coords(0.96, -0.72)
#plt.suptitle('Rank Comparison of stowage plans',x=0.5*0.9)
#plt.tight_layout()


plt.savefig(module_path + '\\denisity_p'+str(round(temperature,4))+'prozent_bw50.pdf', dpi=600, bbox_inches="tight")

Process 0.0%		Current env.p=0.6
			Mann-Whitney-U p_value: 0.0
Process 2.5%		Current env.p=0.5846
			Mann-Whitney-U p_value: 0.0
Process 5.0%		Current env.p=0.5692
			Mann-Whitney-U p_value: 0.0
Process 7.5%		Current env.p=0.5538
			Mann-Whitney-U p_value: 0.0
Process 10.0%		Current env.p=0.5385
			Mann-Whitney-U p_value: 0.0
Process 12.5%		Current env.p=0.5231
			Mann-Whitney-U p_value: 0.0
Process 15.0%		Current env.p=0.5077
			Mann-Whitney-U p_value: 0.0
Process 17.5%		Current env.p=0.4923
			Mann-Whitney-U p_value: 0.0
Process 20.0%		Current env.p=0.4769
			Mann-Whitney-U p_value: 0.0
Process 22.5%		Current env.p=0.4615
			Mann-Whitney-U p_value: 0.0
Process 25.0%		Current env.p=0.4462


KeyboardInterrupt: 

In [None]:
time1=datetime.now()
means = []
std = []
x = np.linspace(0,1,21)
for ix,i in enumerate(x):
    print('Process {}%'.format(round(ix*100/len(x),4)))
    env.p = i
    performance = valuate_model(env,random_plans, n=100)
    means += [performance.mean()]
    std += [performance.std()]
    
plt.plot(x, means)
plt.show()
print('This took:',datetime.now()-time1,'s')

In [None]:
env.stochastic= True
env.p = 0.0
best_sp = []

while len(best_sp)<100:
    env.reset()
    agent.execute()
    evaluation = evaluator.evaluate(env.get_stowage_plan())
    if evaluation not in random:
        best_sp += [evaluation]

In [None]:
dummy = random + best_sp
dummy.sort()

In [None]:
random_ranks = []
agent_ranks = []
for ix, i in enumerate(dummy):
    if i in best_sp:
        agent_ranks+=[ix]
    else:
        random_ranks+=[ix]
        
agent_ranks = np.array(agent_ranks)
random_ranks = np.array(random_ranks)

In [None]:
#print(mannwhitneyu(random_ranks, agent_ranks, alternative='two-sided'),'\n')
_,p = mannwhitneyu(random_ranks, agent_ranks, alternative='two-sided')
print(p)
print(mannwhitneyu(random_ranks, agent_ranks, alternative='two-sided'))
print(ranksums(random_ranks,agent_ranks))
print(kruskal(random_ranks, agent_ranks))

In [None]:
module_path = str(os.getcwd())+'\\out\\plots11092020'
os.makedirs(module_path, exist_ok=True)


sns.set(style="whitegrid")
#sns.set(font_scale=1, rc={'text.usetex' : True})
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams.update({'font.size': 11})
plt.rcParams.update({'text.color' : "black",
                     'axes.labelcolor' : "black"})
plt.tight_layout()


#fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(2.6, 4))
bw = 50

fig = plt.figure(figsize=(5.1, 2.2))
#fig = plt.figure(figsize=(7.9, 3.3))

fig.tight_layout()
gs = fig.add_gridspec(4, 3)

fi_ax1 = fig.add_subplot(gs[0:3, :])
fi_ax2 = fig.add_subplot(gs[3, :],sharex=fi_ax1)


plt.setp(fi_ax1.get_xticklabels(), visible=False)
plt.setp(fi_ax2.get_xticklabels(), visible=True)


ax = sns.kdeplot(np.array(agent_ranks), bw=bw, clip=[0,len(dummy)],kernel='epa', ax=fi_ax1,label='Agent')
ax = sns.kdeplot(np.array(random_ranks),ax=ax , bw=bw, clip=[0,len(dummy)],kernel='epa',label='Random')

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

ax.legend(loc='center left', bbox_to_anchor=(1, box.y0 +0.5* box.height), fontsize=11*(1/0.9))

ax = sns.stripplot(np.array(agent_ranks),alpha=0.5, ax = fi_ax2,label='Agent')
ax = sns.stripplot(np.array(random_ranks),alpha=0.35, ax = ax, color=sns.color_palette("deep")[1],label='Random')
ax.set_xlim(-bw, len(dummy)+bw)

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

# Put a legend below current axis
#leg = ax.legend(loc='upper left', bbox_to_anchor=(-0.013, -0.6),
#          fancybox=True, shadow=False, ncol=1, fontsize=11*(1/0.9))
#for lh in leg.legendHandles: 
#    lh.set_alpha(1)

    
fi_ax1.set(ylabel='Density')
fi_ax2.set(xlabel='Ranks')
#fi_ax2.xaxis.set_label_coords(0.96, -0.72)
#plt.suptitle('Rank Comparison of stowage plans',x=0.5*0.9)
#plt.tight_layout()


plt.savefig(module_path + '\\denisity_p60prozent_bw50.pdf', dpi=600, bbox_inches="tight")

In [None]:
agent = sarsa.SARSA(env, module_path)
agent.load_model(module_path+"20200729\\1827\\1827SARSA_L10_R14_Rf1_A5.p")

for info in agent.q_table["ModelParam"]:
    print(str(info)+'\t\t'+str(agent.q_table["ModelParam"][info]))

In [None]:
env.stoachstic = True

time1=datetime.now()
means_2 = []
x = np.linspace(0.5,1,15)
for i in x:
    env.p = i

    means_2 += [valuate_model(env,stowage_Plans, n=10).mean()]

    
plt.plot(x, means)
plt.show()
print('This took:',datetime.now()-time1,'s')

In [None]:
env.stochastic= True
env.p = .8
env.reset()
agent.execute()
#print(env.loading_sequence)
evaluation = evaluator.evaluate(env.get_stowage_plan())
print(evaluation)
stowage_Plans_XX = stowage_Plans.copy()
stowage_Plans_XX += [evaluation]

#remove duplicates
stowage_Plans_XX = list(dict.fromkeys(stowage_Plans_XX))
stowage_Plans_XX.sort()

if len(stowage_Plans_XX) == len(stowage_Plans)+1:
    stowage_Plans_XX = stowage_Plans_XX[1:]

In [None]:
#at which postion is the stowage plan of the agent. (maximal Performance 100%)
for ix,i in enumerate(stowage_Plans_XX):
    if i == evaluation:
        print(str(ix+1)+". Position of "+str(len(stowage_Plans))+ \
              "\t Performance of model: "+str((ix+1)/(len(stowage_Plans))))
        break

In [None]:
agent = sarsa.SARSA(env, module_path)
agent.load_model(module_path+"20200729\\1827\\1827SARSA_L10_R14_Rf1_A5.p")

for info in agent.q_table["ModelParam"]:
    print(str(info)+'\t\t'+str(agent.q_table["ModelParam"][info]))

In [None]:
module_path = str(os.getcwd())+'\\out\\plots'
os.makedirs(module_path, exist_ok=True)


sns.set(style="whitegrid")
#sns.set(font_scale=1, rc={'text.usetex' : True})
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams.update({'font.size': 11})
plt.rcParams.update({'text.color' : "black",
                     'axes.labelcolor' : "black"})
plt.tight_layout()


#fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(2.6, 4))
bw = 50

fig = plt.figure(figsize=(5.1, 2.7))
#fig = plt.figure(figsize=(7.9, 3.3))

fig.tight_layout()
gs = fig.add_gridspec(4, 3)

fi_ax1 = fig.add_subplot(gs[0:3, :])
fi_ax2 = fig.add_subplot(gs[3, :],sharex=fi_ax1)


plt.setp(fi_ax1.get_xticklabels(), visible=False)
plt.setp(fi_ax2.get_xticklabels(), visible=True)


ax = sns.kdeplot(np.array(agent_ranks), bw=bw, clip=[0,len(dummy)],kernel='epa', ax=fi_ax1)
ax = sns.kdeplot(np.array(random_ranks),ax=ax , bw=bw, clip=[0,len(dummy)],kernel='epa')


ax = sns.stripplot(np.array(agent_ranks),alpha=0.5, ax = fi_ax2,label='Agent')
ax = sns.stripplot(np.array(random_ranks),alpha=0.35, ax = ax, color=sns.color_palette("deep")[1],label='Random')
ax.set_xlim(-bw, len(dummy)+bw)

box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * 0.1,
                 box.width, box.height * 0.9])

# Put a legend below current axis
leg = ax.legend(loc='upper left', bbox_to_anchor=(-0.013, -0.6),
          fancybox=True, shadow=False, ncol=5, fontsize=11*(1/0.9))
for lh in leg.legendHandles: 
    lh.set_alpha(1)

    
fi_ax1.set(ylabel='Density')
fi_ax2.set(xlabel='Ranks')
fi_ax2.xaxis.set_label_coords(0.96, -0.72)
plt.suptitle('Rank Comparison of stowage plans')

plt.savefig(module_path + '\\denisity_a.pdf', dpi=600, bbox_inches="tight")

In [None]:
import seaborn as sns
bw = 50
sns.set_style('whitegrid')
#ax = sns.distplot(np.array(agent_ranks), label='Agent')
sns.stripplot(np.array(agent_ranks),alpha=0.5,dodge=True);
#ax = sns.distplot(np.array(random_ranks),ax=ax, label='Random')
sns.stripplot(np.array(random_ranks), color="orange",alpha=0.5,dodge=True)
plt.title('Density Estimation of Ranks')
ax.set_xlim(-bw, len(dummy)+bw)
plt.show()

In [None]:
import seaborn as sns
bw = 50
sns.set_style('whitegrid')
#ax = sns.distplot(np.array(agent_ranks), label='Agent')
sns.swarmplot(np.array(agent_ranks),alpha=0.5);
#ax = sns.distplot(np.array(random_ranks),ax=ax, label='Random')
sns.swarmplot(np.array(random_ranks), color="orange",alpha=0.5)
plt.title('Density Estimation of Ranks')
ax.set_xlim(-bw, len(dummy)+bw)
plt.show()

In [None]:
#fig, axs = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(2.6, 4))
bw = 20

fig = plt.figure(figsize=(8, 4))
fig.tight_layout()
gs = fig.add_gridspec(3, 3)

fi_ax1 = fig.add_subplot(gs[0:2, :])
fi_ax2 = fig.add_subplot(gs[2, :],sharex=fi_ax1)


plt.setp(fi_ax1.get_xticklabels(), visible=False)
plt.setp(fi_ax2.get_xticklabels(), visible=True)


ax = sns.kdeplot(np.array(agent_ranks), bw=bw, clip=[0,len(dummy)],kernel='epa', ax=fi_ax1)
ax = sns.kdeplot(np.array(random_ranks),ax=ax , bw=bw, clip=[0,len(dummy)],kernel='epa')


ax = sns.swarmplot(np.array(agent_ranks),alpha=0.6, ax = fi_ax2,label='Agent')
ax = sns.swarmplot(np.array(random_ranks),alpha=0.6, ax = ax, color="orange",label='Random')
ax.set_xlim(-bw, len(dummy)+bw)

box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * 0.1,
                 box.width, box.height * 0.9])

# Put a legend below current axis
leg = ax.legend(loc='upper left', bbox_to_anchor=(-0.013, -0.4),
          fancybox=True, shadow=False, ncol=5)
for lh in leg.legendHandles: 
    lh.set_alpha(1)

In [None]:
print(module_path)