## import packages

In [None]:
import os
import pandas as pd
import numpy as np

In [None]:
import keras

from keras.models import load_model
import tensorflow as tf
from sklearn.metrics import roc_auc_score

## prepare notebook

In [None]:
# config project folder
os.chdir('../')

# config notebook display
%matplotlib inline
pd.set_option("display.max_rows", None)
pd.set_option('display.max_columns', None)

## read data

In [None]:
# load test array
test = np.load('data/safety/test_arr.npy')
test.shape

In [None]:
# load test bookingID and subID
# subID: 
    # if a trip is longer than 1 hr, it has been split into multiple sub-trips with subID 1, 2, 3
    # if a trip is shorter than 1 hr, it's subID is 1

testID = pd.read_csv('data/safety/test_id.csv')
testID.head(3)

## load best model

In [None]:
# define custom function auroc as metrics
def auroc(y_true,y_pred):
    return tf.py_func(roc_auc_score,(y_true,y_pred),tf.double)
# load model
model = load_model('model/best_model.08-0.74.h5',custom_objects={'auroc':auroc})
# summarize model
model.summary()

## predict on test data

In [None]:
Xnew = test

In [None]:
ynew = model.predict(Xnew)

In [None]:
# combine predicted probability for sub-trips of the same booking
# The final predicted probability of a trip takes the maximum predicted probability of its sub-trips
# reasons for taking maximum: for any time interval, if the driver has displayed dangerous behavioru, the whole trip is labeld as dangerous
test_df = testID.copy()
test_df['predict_proba'] = ynew
test_df_final = test_df.groupby(['bookingID']).predict_proba.max()

## write prediction output

In [None]:
test_df_final.to_csv('data/safety/prediction_output.csv', index=False)