In [56]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from IPython.display import display

from sklearn.metrics import confusion_matrix, classification_report

In [47]:
ROOT = Path("../../../grabo/sti1/").absolute()
DATA_DIR = ROOT / "data/grabo_w2v"
OUT_DIR = ROOT / "exp/debug"

LABELS = [
    'action_approach', 'action_grab', 'action_lift', 'action_move_abs', 
    'action_move_rel', 'action_pointer', 'action_turn_abs', 'action_turn_rel', 
    'throttle_fast', 'throttle_slow', 'distance_alot', 'distance_little', 
    'distance_normal', 'direction_backward', 'direction_forward', 
    'angle_east', 'angle_north', 'angle_south', 'angle_west', 
    'posx_centerx', 'posx_left', 'posx_right', 
    'posy_centery', 'posy_down', 'posy_up', 
    'position_down', 'position_up', 'state_off', 'state_on', 
    'grabber_close', 'grabber_open'
]

In [7]:
def softmax(array):
    return np.exp(array) / np.exp(array.sum())

In [51]:
test_set = (
    pd.read_csv(DATA_DIR / "test.csv", index_col="uttid")
    .rename(columns={k: k.replace('_', '') for k in ('pos_x', 'pos_y')})
    .fillna("")
)

columns = columns = pd.MultiIndex.from_tuples(map(lambda s: s.split("_", maxsplit=1), LABELS))
predictions = pd.DataFrame(np.load(OUT_DIR / "test_results_sti.npy"), index=test_set.index, columns=columns)

In [136]:
predictions.xs("action", level=0, axis=1).columns

Index(['approach', 'grab', 'lift', 'move_abs', 'move_rel', 'pointer',
       'turn_abs', 'turn_rel'],
      dtype='object')

In [138]:
action_predicted = (
    predictions.xs("action", level=0, axis=1)
    .apply(lambda row: row.idxmax(), axis=1)
)

action_scores = pd.DataFrame(
    confusion_matrix(test_set["action"], action_predicted),
    index=predictions.xs("action", level=0, axis=1).columns,
    columns=predictions.xs("action", level=0, axis=1).columns
)

action_scores

Unnamed: 0,approach,grab,lift,move_abs,move_rel,pointer,turn_abs,turn_rel
approach,34,0,0,0,0,0,0,0
grab,0,21,0,0,0,0,0,0
lift,0,0,34,0,0,0,0,0
move_abs,0,0,0,63,0,0,0,0
move_rel,0,0,0,0,163,0,0,0
pointer,0,0,0,0,0,4,0,0
turn_abs,0,0,0,0,0,0,54,0
turn_rel,0,0,0,0,0,0,0,97


In [82]:
def hard_prediction(row):
    row = row > 0.5
    if row.sum() > 1:
#         raise ValueError("_".join(idx for idx, c in row.iteritems() if c))
        return "_".join(idx for idx, c in row.iteritems() if c)
    elif row.sum() == 1:
        return row.index[row][0]
    else:
        return ""

predictions.xs('angle', axis=1, level=0).apply(hard_prediction, axis=1)

uttid
pp4_recording23_Voice_13    east
pp4_recording25_Voice_1     west
pp3_recording4_Voice_10         
pp7_recording6_Voice_1          
pp6_recording10_Voice_12        
                            ... 
pp11_recording21_Voice_6    west
pp4_recording10_Voice_4         
pp8_recording1_Voice_6          
pp2_recording11_Voice           
pp4_recording30_Voice_13        
Length: 470, dtype: object

In [150]:
actual = pd.DataFrame(
    np.zeros_like(predictions, dtype=np.int64), 
    index=test_set.index, 
    columns=list(map("_".join, predictions.columns))
)
actual.update(pd.get_dummies(test_set.applymap(lambda s: s or None)))
actual.columns = predictions.columns

all_scores = (
    ((predictions > .5).astype(int) == actual).mean(0)
    .rename('accuracy')
    .to_frame()
    .applymap('{:.3%}'.format)
)

all_scores.iloc[15:]

Unnamed: 0,Unnamed: 1,accuracy
angle,east,97.660%
angle,north,100.000%
angle,south,99.787%
angle,west,98.723%
posx,centerx,100.000%
posx,left,99.787%
posx,right,99.574%
posy,centery,100.000%
posy,down,99.574%
posy,up,99.787%


In [146]:
actual = pd.DataFrame(
    np.zeros_like(predictions.iloc[:, 8:], dtype=np.int64), 
    index=test_set.index, 
    columns=list(map("_".join, predictions.columns[8:]))
)
actual.update(pd.get_dummies(test_set.iloc[:, 1:].applymap(lambda s: s or None)))
actual.columns = predictions.columns[8:]

instruction_scores = (
    ((predictions.iloc[:, 8:] > .5).astype(int) == actual).mean(0)
    .rename('accuracy')
    .to_frame()
    .applymap('{:.3%}'.format)
)

instruction_scores

Unnamed: 0,Unnamed: 1,accuracy
throttle,fast,100.000%
throttle,slow,100.000%
distance,alot,100.000%
distance,little,100.000%
distance,normal,100.000%
direction,backward,99.149%
direction,forward,98.936%
angle,east,97.660%
angle,north,100.000%
angle,south,99.787%


In [85]:
for instruction in predictions.columns.levels[0][1:]:
    pred = predictions.xs(instruction, axis=1, level=0)
    index = ["", *test_set[instruction].unique()]
    print(instruction)
    display(pd.DataFrame(
        confusion_matrix(test_set[instruction], pred.apply(hard_prediction, axis=1)),
        index=index, columns=index
    ))

angle


Unnamed: 0,Unnamed: 1,east,west,Unnamed: 4,north,south
,319,0,0,0,0,0
east,7,48,0,0,0,1
west,0,0,0,0,0,0
,0,0,0,11,0,0
north,1,0,0,0,42,0
south,4,1,2,0,0,34


direction


Unnamed: 0,Unnamed: 1,Unnamed: 2,forward,backward
,307,0,0,0
,0,75,1,1
forward,0,0,0,0
backward,0,3,0,83


distance


ValueError: Shape of passed values is (4, 4), indices imply (5, 5)

In [54]:
predictions.iloc[:, 8:]

Unnamed: 0_level_0,east,north,south,west
uttid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
pp4_recording23_Voice_13,6.138597e-01,6.411035e-06,1.209688e-04,8.081779e-02
pp4_recording25_Voice_1,7.194681e-03,9.387861e-02,2.892129e-02,7.558887e-01
pp3_recording4_Voice_10,7.799731e-10,2.563330e-07,1.787946e-09,1.152223e-08
pp7_recording6_Voice_1,2.821580e-10,1.833418e-07,2.465217e-07,2.014582e-07
pp6_recording10_Voice_12,4.031817e-06,5.937622e-06,4.276057e-08,1.068261e-07
...,...,...,...,...
pp11_recording21_Voice_6,3.956302e-02,1.219189e-04,2.819326e-03,9.844220e-01
pp4_recording10_Voice_4,4.183524e-07,1.828067e-06,2.105035e-06,9.201279e-07
pp8_recording1_Voice_6,2.086784e-08,1.795965e-06,3.903788e-05,2.078645e-07
pp2_recording11_Voice,2.917113e-08,1.744595e-08,1.611326e-09,4.530900e-06
