## Import testing dataset

In [None]:
## import required packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from statistics import *
import pickle
import statistics
from scipy.stats.stats import pearsonr
from sklearn.metrics import auc
from scipy import integrate

In [None]:
df_test = pd.read_csv("") ##use your test dataset 

## Aggregate dataset

In [None]:
df = df.sort_values(by=['bookingID','second'],ascending=True)  ## sort values first by booking_id then by seconds 

In [None]:
##groupby bookingid then generate a list for each field
df = df.groupby(['bookingID']).agg(lambda x: list(x)) 

In [None]:
label = pd.read_csv('') ## import your test label 
label = label.sort_values(by = ['bookingID','label'],ascending=True) 
label = label.drop_duplicates(subset=['bookingID'],keep='last') 
label = label.set_index('bookingID',drop = True)

In [None]:
##join features and labels
df_new = pd.concat([df,label],axis=1)
df_new = df_new.dropna(axis=0)

In [None]:
## reset index, convert bookingid to a column
df = df_new.reset_index()

## Feature extraction

- Add resultant_acceleration

In [None]:
result_acc = []
for i in range(len(df.index.values)):
    acc_x = df.iloc[i,:].acceleration_x
    acc_y = df.iloc[i,:].acceleration_y
    acc_z = df.iloc[i,:].acceleration_z
    result_acc.append(np.sqrt(np.square(acc_x) + np.square(acc_y) + np.square(acc_z)))

df['result_acceleration'] = result_acc 

- Add resultant_gyro

In [None]:
result_gyro = []
for i in range(len(df.index.values)):
    gyro_x = df.iloc[i,:].gyro_x
    gyro_y = df.iloc[i,:].gyro_y
    gyro_z = df.iloc[i,:].gyro_z
    result_gyro.append(np.sqrt(np.square(gyro_x) + np.square(gyro_y) + np.square(gyro_z)))

df['result_gyro'] = result_gyro 

- Add rotation angle for x-axis and y-axis

In [None]:
rotate_x = []
for i in range(len(df.index.values)):
    acc_x = df.iloc[i,:].acceleration_x
    acc_y = df.iloc[i,:].acceleration_y
    acc_z = df.iloc[i,:].acceleration_z
    rotate_x.append(np.arctan(acc_y / np.sqrt(np.square(acc_x)+np.square(acc_z))))

df['rotate_x'] = rotate_x

In [None]:
def flip_sign(list_):
    return [-x for x in list_]

rotate_y = []

for i in range(len(df.index.values)):
    acc_x = flip_sign(df.iloc[i,:].acceleration_x)
    acc_z = df.iloc[i,:].acceleration_z
    rotate_y.append(np.arctan(np.array(acc_x) / np.array(acc_z)))

df['rotate_y'] = rotate_y

- Extract features from raw dataset

In [None]:
class feature_global():
    
    def __init__ (self, df):
        self.df = df 
        
    def sum_(self,field,index):
        return sum(np.abs(self.df.iloc[index,:][field]))
    
    def mean(self,field,index):
        return np.mean(self.df.iloc[index,:][field])
    
    def max_(self,field,index):
        return max(self.df.iloc[index,:][field]) 
    
    def iqr(self,field,index):   ## Interquantile range
        return np.percentile(self.df.iloc[index,:][field],75) - np.percentile(self.df.iloc[index,:][field],25)     
    
    def integrate(self,field,index):
        integ = auc(self.df.iloc[index,:].second,np.abs(self.df.iloc[index,:][field]))
        return integ
    
    def max_consecutive_increase(self,field,index): ##maximum interval of consecutive increase
        list_ = self.df.iloc[index,:][field]
        max_increase = 0     
        count = 0
        for i in range(len(list_)-1):
            if list_[i+1] > list_[i]:
                count += 1
                if count > max_increase:
                    max_increase = count
            else: 
                count = 0
        return (max_increase + 1)/len(list_)
    
    def max_consecutive_decrease(self,field,index):##maximum interval of consecutive decrease
        list_ = self.df.iloc[index,:][field]
        max_decrease = 0     
        count = 0
        for i in range(len(list_)-1):
            if list_[i+1] < list_[i]:
                count += 1
                if count > max_decrease:
                    max_decrease = count
            else: 
                count = 0
        return (max_decrease + 1)/len(list_)
    
    def change(self,field,index):  ## change in time series (return the mean and max)
        a = self.df.iloc[index,:][field]
        list_ = [(x - a[i-1]) for i, x in enumerate(a)][1:]
        return [statistics.mean(list_),max(list_)]      
    
    def avg_speed(self,index): ## average speed over the total trip
        distance = feature_global.integrate(self,'Speed',index)
        return distance / self.df.iloc[index,:].second[-1]
    
    def avg_gyro(self,index): ## average angle rotated over the total trip
        rad_dist = feature_global.integrate(self,'result_gyro',index)
        return rad_dist / self.df.iloc[index,:].second[-1]
        
    def bearing(self,index):  ## change in bearing 
        a = self.df.iloc[index,:]['Bearing']
        distance = feature_global.integrate(self,'Speed',index)
        diff_bear = []
        
        for i in range(1,len(a)):  
            if a[i]<90 and a[i-1]>270:   ## this is because the bearing has range(0,360), change from 359.9 to 1 is not 358.9 but 360-359.9+1
                diff_bear.append(a[i] + 360 - a[i-1])
            elif a[i]>270 and a[i-1]<90:
                diff_bear.append(a[i-1] + 360 - a[i])
            else:
                diff_bear.append(a[i] - a[i-1])
        
        return [statistics.mean(diff_bear), max(diff_bear), sum(diff_bear)/distance ] ##return mean, max, change in bearing per distance travelled


In [None]:
acc_mean = []
acc_max = []
acc_iqr = []
acc_increase = []
acc_decrease = []
acc_mean_diff = []
acc_max_diff = []

In [None]:
rotate_x_max = []
rotate_x_max_diff = []
rotate_x_dist = []

rotate_y_max = []
rotate_y_max_diff = []
rotate_y_dist = []

rotate_z_dist = []

In [None]:
gyro_mean = []
gyro_max = []
gyro_iqr = []
gyro_increase = []
gyro_decrease = []
rad_dist = []
avg_gyro = []

In [None]:
speed_mean = []
speed_max = []
speed_iqr = []
speed_increase = []
speed_decrease = []
distance = []
avg_speed = []

In [None]:
bear_increase = []
bear_decrease = []
bear_mean_diff = []
bear_max_diff = []
bear_change_per_dist = []

In [None]:
trip_len = []

features = feature_global(df)

In [None]:
for i in range(len(df.index.values)): 

    acc_mean.append(features.mean('result_acceleration',i))
    acc_max.append(features.max_('result_acceleration',i))
    acc_iqr.append(features.iqr('result_acceleration',i))
    acc_increase.append(features.max_consecutive_increase('result_acceleration',i))
    acc_decrease.append(features.max_consecutive_decrease('result_acceleration',i))
    acc_mean_diff.append(features.change('result_acceleration',i)[0])
    acc_max_diff.append(features.change('result_acceleration',i)[1])
    
    gyro_mean.append(features.mean('result_gyro',i))
    gyro_max.append(features.max_('result_gyro',i))
    gyro_iqr.append(features.iqr('result_gyro',i))
    gyro_increase.append(features.max_consecutive_increase('result_gyro',i))
    gyro_decrease.append(features.max_consecutive_decrease('result_gyro',i))
    avg_gyro.append(features.avg_gyro(i))
    rad_dist.append(features.integrate('result_gyro',i))
    
    bear_increase.append(features.max_consecutive_increase('Bearing',i))
    bear_decrease.append(features.max_consecutive_decrease('Bearing',i))
    bear_mean_diff.append(features.bearing(i)[0])
    bear_max_diff.append(features.bearing(i)[1])
    bear_change_per_dist.append(features.bearing(i)[2])
    
    speed_mean.append(features.mean('Speed',i))
    speed_max.append(features.max_('Speed',i))
    speed_iqr.append(features.iqr('Speed',i))
    speed_increase.append(features.max_consecutive_increase('Speed',i))
    speed_decrease.append(features.max_consecutive_decrease('Speed',i))
    distance.append(features.integrate('Speed',i))
    avg_speed.append(features.avg_speed(i))

    rotate_x_max.append(features.max_('rotate_x',i))
    rotate_x_max_diff.append(features.change('rotate_x',i)[1])
    rotate_x_dist.append(features.integrate('gyro_x',i))

    rotate_y_max.append(features.max_('rotate_y',i))
    rotate_y_max_diff.append(features.change('rotate_y',i)[1])
    rotate_y_dist.append(features.integrate('gyro_y',i))

    rotate_z_dist.append(features.integrate('gyro_z',i))


    trip_len.append(df.iloc[i,:].second[-1])
    
    if i % 5000 ==0:
        print(i,' out of ',len(df.index.values),' is completed')        

In [None]:
len(rotate_x_dist)

## Combine features into a single dataframe

In [None]:
df_feature = np.c_[list(df.bookingID), list(df.label), acc_mean, 
                  acc_max,  acc_iqr, acc_increase, acc_decrease, acc_mean_diff,
                  acc_max_diff, rotate_x_max, rotate_x_max_diff, rotate_x_dist, rotate_y_max,
                  rotate_y_max_diff, rotate_y_dist, rotate_z_dist,gyro_mean, gyro_max, gyro_iqr,
                  gyro_increase, gyro_decrease, rad_dist,avg_gyro, speed_mean,  speed_max,
                  speed_iqr, speed_increase, speed_decrease, distance, avg_speed,
                  bear_increase, bear_decrease,  bear_mean_diff, bear_max_diff,
                  bear_change_per_dist, trip_len]

In [None]:
df_feature = pd.DataFrame(df_feature)
df_feature.columns = ['bookingID', 'label', 'acc_mean', 
                  'acc_max',  'acc_iqr', 'acc_increase', 'acc_decrease', 'acc_mean_diff',
                  'acc_max_diff', 'rotate_x_max', 'rotate_x_max_diff', 'rotate_x_dist', 'rotate_y_max',
                  'rotate_y_max_diff', 'rotate_y_dist', 'rotate_z_dist','gyro_mean', 'gyro_max', 'gyro_iqr',
                  'gyro_increase', 'gyro_decrease', 'rad_dist','avg_gyro', 'speed_mean',  'speed_max',
                  'speed_iqr', 'speed_increase', 'speed_decrease', 'distance', 'avg_speed',
                  'bear_increase', 'bear_decrease',  'bear_mean_diff', 'bear_max_diff',
                  'bear_change_per_dist', 'trip_len']

In [None]:
df_feature.describe().iloc[:,:10]

In [None]:
df_feature.info()

## Model testing

In [None]:
# load the model from disk, the filename is the path of your saved model
loaded_model = pickle.load(open(filename, 'rb'))
# then use df_feature for model testing
df_feature = df_feature.drop(columns=['bookingID'],axis=1)