In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
from __future__ import division, print_function
from __future__ import absolute_import, unicode_literals
from IPython.display import display
import pandas as pd
import numpy as np
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
from tabulate import tabulate

import bokeh.io
import bokeh.mpl
import bokeh.plotting

%config InlineBackend.figure_formats = {'eps', 'retina'}
bokeh.io.output_notebook()

rc = {'lines.linewidth': 1, 
      'axes.labelsize': 14, 
      'axes.titlesize': 14, 
      'axes.facecolor': 'FFFFFF',
       }

sns.set_context('notebook', rc=rc)
sns.set_style('white', rc=rc)
sns.set_palette("colorblind")

pd.set_option('precision', 4)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [11]:
def is_bad(x):
    if x in [-0.5, -1.0, 9.5]:
        return True
    else:
        return False

def load_transform(fp):
    df = pd.read_json(fp)
    df = df.sort_values(by=['trip', 't'])
    bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    df['trip_interval'] = pd.cut(df['trip'], 
                                    bins=bins,
                                    precision=3,
                                    labels = trips_intervals)
    df['success'] = df['success'].astype(int)
    df['reward'] = df['reward'].astype(int)
    df['bad_moves'] = df['reward'].apply(lambda x: int(is_bad(x)))
    return df


def plot_path(df, deadline=False):
    df = df.sort_values(by=['trip', 't'])
    grid = sns.FacetGrid(df, col="trip", 
                         hue="trip",
                         col_wrap=5,
                         size=2.0,
                         aspect=1.5)
    grid.map(plt.axhline, y=0, ls=":", c=".5", label='No move')
    grid.map(plt.plot, "t", "reward", marker="o", ms=5, lw=.8)

    if deadline:
        grid.set(xticks=np.linspace(0, 50, 6, endpoint=True), xlim=(-1, 50), ylim=(-3, 13))
    else:
        grid.set(xticks=np.linspace(0, 150, 6, endpoint=True), xlim=(-1, 150), ylim=(-3, 13))
    grid.fig.tight_layout(w_pad=1)
    plt.savefig("./figures/q_learn_agent.eps", format='eps', dpi=1000)

def review_results(df):
    print('Successful trips:')
    display(pd.crosstab(df.trip_interval, df.success, margins=True).T)
    
    print("\nRewards:")
    display(pd.crosstab(df.trip_interval, df.reward, margins=True).T)

    print("\nBad moves:")
    display(pd.crosstab(df.trip_interval, df.bad_moves, margins=True).T)
    
def publish_results(df, n=1):
    print(tabulate(df.groupby(['trip_interval'])['success', 'bad_moves'].sum()/n,
         headers=['Trips', 'Success', 'Bad moves'],
         tablefmt='latex_booktabs'))
    
trips_intervals = ['01 - 10', '11 - 20', '21 - 30', '31 - 40', '41 - 50', '51 - 60', '61 - 70', '71 - 80', '81 - 90', '91 - 100']

In [3]:
# File needed for the notebook
fp = "./results/q_agent.json"
df = load_transform(fp)

In [4]:
# Random Action No Deadline
review_results(df)

Successful trips:


trip_interval,01 - 10,11 - 20,21 - 30,31 - 40,41 - 50,51 - 60,61 - 70,71 - 80,81 - 90,91 - 100,All
success,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,217,169,153,136,175,193,201,205,158,144,1751
1,4,5,5,6,6,5,2,2,7,5,47
All,221,174,158,142,181,198,203,207,165,149,1798



Rewards:


trip_interval,01 - 10,11 - 20,21 - 30,31 - 40,41 - 50,51 - 60,61 - 70,71 - 80,81 - 90,91 - 100,All
reward,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
-1,7,0,3,1,1,2,3,3,1,0,21
0,80,62,49,67,53,70,29,45,53,45,553
2,130,107,101,68,121,121,169,157,104,99,1177
12,4,5,5,6,6,5,2,2,7,5,47
All,221,174,158,142,181,198,203,207,165,149,1798



Bad moves:


trip_interval,01 - 10,11 - 20,21 - 30,31 - 40,41 - 50,51 - 60,61 - 70,71 - 80,81 - 90,91 - 100,All
bad_moves,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,214,174,155,141,180,196,200,204,164,149,1777
1,7,0,3,1,1,2,3,3,1,0,21
All,221,174,158,142,181,198,203,207,165,149,1798


In [5]:
publish_results(df)

\begin{tabular}{lrr}
\toprule
 Trips    &   Success &   Bad moves \\
\midrule
 01 - 10  &         4 &           7 \\
 11 - 20  &         5 &           0 \\
 21 - 30  &         5 &           3 \\
 31 - 40  &         6 &           1 \\
 41 - 50  &         6 &           1 \\
 51 - 60  &         5 &           2 \\
 61 - 70  &         2 &           3 \\
 71 - 80  &         2 &           3 \\
 81 - 90  &         7 &           1 \\
 91 - 100 &         5 &           0 \\
\bottomrule
\end{tabular}


In [9]:
first_20 = df[df['trip'].apply(lambda x: 0 < x <= 20)]
plot_path(first_20, deadline=True)

<matplotlib.figure.Figure at 0x1092d28d0>

In [7]:
# local_min = df[df['trip'].apply(lambda x: 31 <= x <= 60)]
# plot_path(local_min, deadline=True)

<matplotlib.figure.Figure at 0x108a2c278>

In [13]:
df = load_transform("./results/q_agent_sim_1000.json")
publish_results(df, n=1000)

\begin{tabular}{lrr}
\toprule
 Trips    &   Success &   Bad moves \\
\midrule
 01 - 10  &     4.178 &       5.47  \\
 11 - 20  &     4.465 &       1.5   \\
 21 - 30  &     4.452 &       1.384 \\
 31 - 40  &     4.51  &       1.244 \\
 41 - 50  &     4.319 &       1.143 \\
 51 - 60  &     4.31  &       1.159 \\
 61 - 70  &     4.251 &       1.042 \\
 71 - 80  &     4.266 &       1.005 \\
 81 - 90  &     4.193 &       0.901 \\
 91 - 100 &     4.092 &       0.867 \\
\bottomrule
\end{tabular}
