In [139]:
import pandas as pd
import numpy as np
import random
import json
from numpy import newaxis
from keras.models import Sequential
from keras.layers.core import Dense, Flatten, Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import BatchNormalization
from keras.optimizers import SGD
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from keras.wrappers.scikit_learn import KerasClassifier 
from eli5.sklearn import PermutationImportance
import eli5

In [140]:
def match_data_to_label(features, matched):
    columns_names=list(features.columns) + list(matched.columns) if "label" not in features.index else list(features.columns)
    df = pd.DataFrame(np.zeros([matched.shape[0], len(columns_names)]), columns=columns_names, index=labels.index)
    for _id, seq in df.iterrows():
        found = True
        for item in columns_names[:-1]:
            if(_id in features.index):
                seq[item] = features.at[_id, item]
            else:
                found = False
                continue
        if(found):
            seq["label"] = matched.at[_id,"label"]
    return df

#Creating training data
def create_training_data(train, test_size=0.25):
    x_train = train[features]
    y_train = train["label"]
    x_train, x_test, y_train, y_test = train_test_split(x_train, y_train,random_state=random_state, test_size=test_size)
    x_train = x_train.to_numpy().reshape(x_train.shape[0],x_train.shape[1],1)
    x_test = x_test.to_numpy().reshape(x_test.shape[0],x_test.shape[1],1)
    onehot_encoder = OneHotEncoder(sparse=False)
    y_train = onehot_encoder.fit_transform(y_train.to_numpy().reshape(y_train.shape[0],1))
    y_test = onehot_encoder.fit_transform(y_test.to_numpy().reshape(y_test.shape[0],1))
    return x_train, x_test, y_train, y_test

def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
    # Print New Line on Complete
    if iteration == total: 
        print()


In [141]:

# Structure train and test data
class_names = ["Pedestrian", "Bicycle", "Car"]
n_classes = len(class_names)
random_state = 10

data = pd.read_csv("training_data/filtered_features_normal.csv").drop_duplicates(subset=["id"],keep="first").set_index("id")
smote = pd.read_csv("training_data/filtered_features_smote.csv").drop_duplicates(subset=["id"],keep="first").set_index("id")
display(data)
features = data.columns.values[:-1]

x_train, x_test, y_train, y_test = create_training_data(data, test_size=0.2)
x_train_sm, x_test_sm, y_train_sm, y_test_sm = create_training_data(smote, test_size=0.2)

n_train, n_feat,_=x_train.shape

Unnamed: 0_level_0,avg_speed,min_speed,max_speed,max_accel,max_deaccel,mean_dist_med_speed,sd_speed,percent_ped_speed,percent_bic_speed,percent_car_speed,num_stops,distance,msd,sinuosity,turosity,convex_hull_area,mean_relative_angle,duration,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
6lmxhlsdu2820pp9ewk41z,16.092218,0.232964,25.961550,10.652202,-13.501058,12.864293,5.964283,0.697674,7.209302,86.511628,0.0,7917.451715,3.121126e-08,1.037741,0.004473,0.155636,0.054281,492.005,2.0
g234mtreigui4a64i4xea9,6.322339,0.141193,12.872192,3.198201,-6.585177,6.365499,2.824493,0.735294,26.470588,36.764706,2.0,1132.324604,4.941278e-09,1.007190,0.012726,0.021988,0.142937,179.099,2.0
hwt93drmgmyk3pnt7hib08,1.275309,0.620147,2.196088,1.129826,-7.976933,0.787971,0.274747,67.193676,0.000000,0.000000,0.0,366.273853,1.103109e-10,2.232828,0.009084,0.006280,-0.585815,287.204,0.0
9gmnn1cdfv7vbptl9p83u2,1.210824,0.079203,2.395365,1.275051,-4.657094,1.158081,0.336322,60.103627,0.000000,0.000000,4.0,562.095759,1.374220e-10,1.304693,0.006812,0.013878,0.552962,464.226,0.0
18ejk5cjxixj051qiupxyb,1.313297,0.637032,2.017232,1.256552,-2.369025,0.690100,0.291555,57.831325,0.000000,0.000000,0.0,126.595226,1.912423e-10,1.040797,0.024806,0.003729,0.468846,96.395,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7g52v5scj2to7js6f4jmi4,1.243533,0.661979,2.256642,1.642066,-0.158448,0.797331,0.316182,69.863014,0.000000,0.000000,0.0,286.859469,4.524025e-10,1.207067,0.021065,0.009650,-0.016736,230.681,0.0
jrz127w0s400wyje7zs4sw,1.763742,0.272602,4.458392,1.916048,-1.848782,2.092895,0.562504,20.855615,1.069519,0.000000,0.0,384.712721,1.375724e-10,1.226148,0.026442,0.012710,-0.251740,218.123,0.0
4s9w2jt143n4i04kwa2jk1,1.499608,0.144606,3.267375,1.447523,-0.904054,1.561384,0.658466,30.833333,0.000000,0.000000,1.0,203.246392,1.598375e-10,1.254053,0.021011,0.005565,-0.283793,135.533,0.0
l209b830umsm9x98hcmktf,1.202782,0.014635,4.305775,2.937353,-2.225721,2.145570,0.638703,29.663609,0.917431,0.000000,6.0,437.922017,1.109559e-10,1.553797,0.012153,0.010930,0.219185,364.091,0.0


In [142]:
#DIFFERENT MODEL ARCHITECTURES
def create_advanced_model(optimizer="adam", activation="relu", loss='categorical_crossentropy'):
    model = Sequential()
    model.add(Conv1D(filters=4,
                     kernel_size=1,
                     strides=1,
                     input_shape=(x_train.shape[1],1), 
                     padding="same",
                     activation="relu"))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(filters=2,
                     kernel_size=1,
                     strides=1,
                     padding="same",
                     activation="tanh"))
    model.add(BatchNormalization())
    model.add(Conv1D(filters=1,
                     kernel_size=1,
                     strides=1,
                     padding="same",
                     activation="sigmoid"))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(200, activation=activation))
    model.add(Dense(100, activation=activation))
    model.add(Dense(n_classes, activation="softmax"))
    model.compile(loss=loss, 
                  optimizer=optimizer, 
                  metrics=['accuracy'])
    return model

def create_simple_model(optimizer="adam", activation="relu", loss='categorical_crossentropy', shape=18):
    model = Sequential()
    model.add(Conv1D(filters=1,
                     kernel_size=1,
                     strides=1,
                     input_shape=(shape,1), 
                     activation=activation))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation=activation))
    model.add(Dense(n_classes, activation="softmax"))
    model.compile(loss=loss, 
                  optimizer=optimizer, 
                  metrics=['accuracy'])
    return model

def base_model():
    model = Sequential()
    model.add(Dense(100, input_dim=n_feat, kernel_initializer='normal', activation='relu'))
    model.add(Dense(50, activation='relu'))
    model.add(Dense(n_classes, kernel_initializer='normal', activation="softmax"))
    model.compile(loss='categorical_crossentropy', optimizer = 'adadelta', metrics=["accuracy"])
    return model

In [144]:
clf = KerasClassifier(build_fn=base_model, epochs=params["epochs"], batch_size=params["batch_size"],verbose=0)
clf.fit(x_train.reshape(n_train, n_feat),y_train)
perm = PermutationImportance(clf, random_state=random_state).fit(x_test.reshape(x_test.shape[0], n_feat),y_test)
eli5.show_weights(perm, feature_names = features)
perm_importance_m = []
for i in range(1):
    clf = KerasClassifier(build_fn=base_model, epochs=params["epochs"], batch_size=params["batch_size"],verbose=0)
    clf.fit(x_train.reshape(n_train, n_feat),y_train)
    perm = PermutationImportance(clf, random_state=random_state).fit(x_test.reshape(x_test.shape[0], n_feat),y_test)
    perm_importance_m.append(perm)
    display(eli5.show_weights(perm, feature_names = features))

Weight,Feature
0.4098  ± 0.0469,distance
0.2413  ± 0.0272,duration
0.0848  ± 0.0234,percent_car_speed
0.0446  ± 0.0360,percent_bic_speed
0.0239  ± 0.0296,percent_ped_speed
0.0076  ± 0.0111,num_stops
0.0043  ± 0.0043,mean_relative_angle
0.0011  ± 0.0106,max_deaccel
0  ± 0.0000,sinuosity
0  ± 0.0000,msd


In [145]:
def grid_search():

    model = KerasClassifier(build_fn=create_simple_model, verbose=0)
    # define the grid search parameters
    optimizer = ['SGD', 'Adadelta', 'Adam',]
    activation = ["relu", "softmax", "sigmoid","tanh"]
    batch_size = [5, 10, 20, 40, 80]
    epochs = [10, 50, 100,200, 300]
    param_grid = dict(optimizer=optimizer, epochs=epochs, batch_size=batch_size, activation=activation)
    
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=6, cv=3)
    grid.fit(x_train, y_train)
    
    print(f"Best estimator:{grid.best_estimator_}\n\nBest params:{grid.best_params_}")
    return grid, grid.best_score_, grid.best_params_



grid_model, grid_acc, grid_params = grid_search()

Best estimator:<keras.wrappers.scikit_learn.KerasClassifier object at 0x7fea4d383588>

Best params:{'activation': 'relu', 'batch_size': 40, 'epochs': 300, 'optimizer': 'Adam'}


In [146]:
grid_model.best_score_

0.9646756329088637

In [None]:
n_split=3
params = {
    "epochs":100,
    "batch_size":40,
    "optimizer":"Adadelta",
    "activation":"relu"
} 

scores = []
results = {
    "smote":{},
    "normal":{}
}
total_best = {
    "model":0,
    "acc":0,
    "loss":0,
    "features_used":[]
}
for t in results:
    for f in features:
        results[t][f] = dict(num_used=0, loss=[], acc=[])
acc_type=[]
json_dump = []
max_feat_removed = 8
num_runs_per_iteration = 5
num_iter = 0
data_type = ["normal", "smote"]
for ty in data_type:
    for i in range(max_feat_removed):
        for j in range(num_runs_per_iteration): 
            sel_feat = random.sample(set(features),len(features)-i)
            if(ty=="smote"):
                x_train, x_test, y_train, y_test = create_training_data(smote, test_size=0.2)
                n_train = x_train.shape[0]
            else:
                x_train, x_test, y_train, y_test = create_training_data(data, test_size=0.2)
                n_train = x_train.shape[0]
            x_train_selected = pd.DataFrame(x_train.reshape(n_train,n_feat), columns=features)[sel_feat].to_numpy()
            x_train_selected = x_train_selected.reshape(x_train_selected.shape[0],x_train_selected.shape[1],1)

            best_res = [0, 0, 0]
            for train_index,test_index in KFold(n_split).split(x_train_selected):
                x_tr,x_te=x_train_selected[train_index],x_train_selected[test_index]
                y_tr,y_te=y_train[train_index],y_train[test_index]
                m = create_simple_model(optimizer=params["optimizer"], activation=params["activation"], shape=x_tr.shape[1])
                m.fit(x_tr, y_tr,epochs=params["epochs"], batch_size=params["batch_size"], verbose=None)
                _loss, _acc = m.evaluate(x_te,y_te)
                print(f"Model type: {ty}. loss:{_loss},acc:{_acc}")
                if(best_res[1]<_acc):
                    best_res = [_loss,_acc, m]
                if(total_best["acc"]<_acc):
                    total_best["model"] = m
                    total_best["acc"] = _acc
                    total_best["loss"] = _loss
                    total_best["features_used"] = sel_feat
                    total_best["data_type"] = ty
                acc_type.append({"type":ty,"acc":_acc})
            json_dump.append(dict(sel_feat=sel_feat, acc=_acc, _loss=_loss, _type=ty))
            for f in sel_feat:
                results[ty][f]["num_used"]+=1
                results[ty][f]["loss"].append(best_res[0])
                results[ty][f]["acc"].append(best_res[1])
            with open("results/type_results.json", 'w+') as outfile:
                json.dump(json_dump, outfile)
            num_iter += 1
            printProgressBar(num_iter, 2*max_feat_removed*num_runs_per_iteration)
total_best
f.close()
# m = create_simple_model(optimizer=params["optimizer"], activation=params["activation"])
# m.fit(x_train, y_train, epochs=params["epochs"], batch_size=params["batch_size"])

Model type: normal. loss:0.25335133978619045,acc:0.9593495920421632
Model type: normal. loss:5.861607103931661,acc:0.6326530615894161
Model type: normal. loss:0.1281241189460365,acc:0.9551020410596108
Model type: normal. loss:0.22914369707185078,acc:0.9308943113660425
Model type: normal. loss:0.17815172185703199,acc:0.9265306127314665
Model type: normal. loss:0.17711577141771512,acc:0.9265306124881822
Model type: normal. loss:0.07657971813123886,acc:0.967479674796748
Model type: normal. loss:3.947293849867217,acc:0.7551020427626006
Model type: normal. loss:0.08950338710327538,acc:0.959183673712672
Model type: normal. loss:0.16901560757702927,acc:0.9268292707156359
Model type: normal. loss:0.35956365627293685,acc:0.9428571433437114
Model type: normal. loss:0.19348127367545148,acc:0.959183673712672
Model type: normal. loss:0.1446792534817525,acc:0.9552845513917566
Model type: normal. loss:3.947818357117322,acc:0.7551020427626006
Model type: normal. loss:0.27627104739753566,acc:0.94693877

In [358]:
def eval_model(model,x_test, y_test):
    _, accuracy = model.evaluate(x_test, y_test, verbose=1)
    print("Model accuracy:",accuracy)

    
eval_model(model,x_test, y_test)

Model accuracy: 0.6666666865348816


In [360]:
x_train.shape

(5, 16, 1)

In [171]:
x_train.reshape((train_row, train_col))

(334, 16)