## Evaluate trained seq2seq RNN model for HDW state prediction

In [1]:
import pickle
import numpy as np
from sklearn import metrics

In [2]:
f_name = '../data/yh_test.pkl'
f_obj = open(f_name,'rb')
yh_test = pickle.load(f_obj)
f_obj.close()

In [3]:
len(yh_test)*171

2223684

In [4]:
def tokenz(loc_vec):  # three element probability output for platform HDW state at +15 min or +30 min
    vec_max = max(loc_vec)
    if vec_max < 0.33:
        return 0
    else:
        return np.argmax(loc_vec) + 1
# token def'n:  0 = [0,0,0] = nan, 1 = [1,0,0], 2 = [0,1,0], 3 = [0,0,1]

In [5]:
act_tok_15 = []
act_tok_30 = []
for i in range(len(yh_test)-1):
    vec_15 = []
    vec_30 = []
    for j in range(171):
        loc_15 = yh_test[i][0][3*j:3*(j+1)]
        loc_30 = yh_test[i][1][3*j:3*(j+1)]
        vec_15.append(tokenz(loc_15))
        vec_30.append(tokenz(loc_30))
    act_tok_15.append(vec_15)
    act_tok_30.append(vec_30)

In [6]:
f_name = '../data/pred_tok_30.pkl'
f_obj = open(f_name,'rb')
pred_tok_30 = pickle.load(f_obj)
f_obj.close()
f_name = '../data/pred_tok_15.pkl'
f_obj = open(f_name,'rb')
pred_tok_15 = pickle.load(f_obj)
f_obj.close()

In [7]:
num_seq = len(act_tok_15)
act_tok_15 = np.asarray(act_tok_15).reshape(num_seq*171,)
act_tok_30 = np.asarray(act_tok_30).reshape(num_seq*171,)
pred_tok_15 = np.asarray(pred_tok_15).reshape(num_seq*171,)
pred_tok_30 = np.asarray(pred_tok_30).reshape(num_seq*171,)

In [8]:
conf_15 = np.zeros((4,4))
for i in range(len(act_tok_15)):
    conf_15[int(act_tok_15[i]),int(pred_tok_15[i])] += 1
acc_15 = np.trace(conf_15)/len(act_tok_15)
val, ct = np.unique(act_tok_15, return_counts=True)
act_val_count_15 = dict(zip(val, ct))
nulacc_15 = act_val_count_15[1]/len(act_tok_15)

In [9]:
print('Multiclassification confusion matrix for 15 min future prediction of headway')
for i in range(4):
    for j in range(4):
        print('{:>10.0f}'.format(conf_15[i,j]),end=' ')
    print()
print()
print('Accuracy: ',acc_15)
print('Null accuracy (on token 1): ',nulacc_15)

Multiclassification confusion matrix for 15 min future prediction of headway
     39565       7175         46        127 
      3145    1928249       8508       4212 
      2698      33722      89533       2843 
      2137      16464       3279      81810 

Accuracy:  0.962061836382
Null accuracy (on token 1):  0.874343437614


In [10]:
conf_30 = np.zeros((4,4))
for i in range(len(act_tok_30)):
    conf_30[int(act_tok_30[i]),int(pred_tok_30[i])] += 1
acc_30 = np.trace(conf_30)/len(act_tok_30)
val, ct = np.unique(act_tok_30, return_counts=True)
act_val_count_30 = dict(zip(val, ct))
nulacc_30 = act_val_count_30[1]/len(act_tok_30)

In [11]:
print('Multiclassification confusion matrix for 30 min future prediction of headway')
for i in range(4):
    for j in range(4):
        print('{:>10.0f}'.format(conf_30[i,j]),end=' ')
    print()
print()
print('Accuracy: ',acc_30)
print('Null accuracy (on token 1): ',nulacc_30)

Multiclassification confusion matrix for 30 min future prediction of headway
     38272       7693         54         52 
      4442    1933454       8059       3822 
      3958      37949      83951       2985 
      2970      17677       3610      74565 

Accuracy:  0.958052415255
Null accuracy (on token 1):  0.876890308264


In [9]:
import pickle
import numpy as np
from sklearn import metrics
f_name = '../data/pred_tok_30_lrg.pkl'
f_obj = open(f_name,'rb')
pred_tok_30 = pickle.load(f_obj)
f_obj.close()

In [10]:
pred_tok_30 = np.asarray(pred_tok_30)
pred_tok_30.shape

(65019, 171)

In [11]:
detok = {}
detok[0] = [0,0,0]
detok[1] = [1,0,0]
detok[2] = [0,1,0]
detok[3] = [0,0,1]

In [13]:
pred_vec_30 = []
for vec in pred_tok_30:
    hdw = []
    for tok in vec:
        hdw.append(detok[tok])
    pred_vec_30.append(hdw)
pred_vec_30 = np.asarray(pred_vec_30).reshape(65019,513)        

In [14]:
pred_vec_30

array([[1, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 0, 0],
       ..., 
       [0, 1, 0, ..., 1, 0, 0],
       [0, 1, 0, ..., 1, 0, 0],
       [0, 1, 0, ..., 1, 0, 0]])

In [15]:
f_name = '../data/pred_vec_30_lrg.pkl'
f_obj = open(f_name,'wb')
pickle.dump(pred_vec_30,f_obj)
f_obj.close()