# EMD algorithm - Experiments with lateral acceleration in one trip

## Maria Inês Silva
## 01/02/2019

***

## Data and library imports

In [None]:
import pandas as pd
import time
import pickle
import extendedMD.emd as emd
from extendedMD.viz import create_motif_table, plot_k_motifs
from extendedMD.pruning import prune_motifs_with_mdl

# local code
import sys
import os
sys.path.insert(0, os.path.abspath('../scripts'))
import dtw_clustering as clust

In [None]:
cwd = os.getcwd()
data_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'data-uah'))
output_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'data-motifs'))
exp = 1

In [None]:
trip_df = pd.read_pickle(os.path.join(data_folder, 'trip_df.pkl'))
ts = trip_df['ay'].values
events_ts = trip_df['event_type'].values

## Experiment 1

In [None]:
R = 0.2
win_size = 15
paa_size = 3
alphabet_size = 5
adaptive_break_points = True

motif_file_name = 'motif_lat_acc_trip_exp' + str(exp) + '.p'
dist_file_name = 'dist_lat_acc_trip_exp' + str(exp) + '.p'
exp += 1

In [None]:
start_time = time.time()
motif_dic_list, ts_1d = emd.find_motifs_from_emd(ts, R, win_size, paa_size, alphabet_size, adaptive_break_points)
pickle.dump(motif_dic_list, open(os.path.join(output_folder, motif_file_name), "wb"))
print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif_df = create_motif_table(motif_dic_list)
motif_df.sort_values('mdl_cost').head(10)

In [None]:
start_time = time.time()
pruned_motif_dic_list = prune_motifs_with_mdl(ts, motif_dic_list, R)
pruned_motif_df = create_motif_table(pruned_motif_dic_list)
print("Prunning algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))
pruned_motif_df

In [None]:
plot_k_motifs(len(pruned_motif_df), ts, events_ts, pruned_motif_dic_list, 'lateral acceleration')

In [None]:
center_ts_list = []
for motif_dic in motif_dic_list:
    motif_center_ts = [ts[i] for i in motif_dic['center_ts_pointers']]
    center_ts_list.append(motif_center_ts)
center_dist_mat = clust.compute_dtw_distance_matrix(center_ts_list, parallel=True)
np.save(os.path.join(output_folder, dist_file_name), center_dist_mat)

## Experiment 2

In [None]:
R = 0.2
win_size = 20
paa_size = 4
alphabet_size = 5
adaptive_break_points = True

motif_file_name = 'motif_lat_acc_trip_exp' + str(exp) + '.p'
dist_file_name = 'dist_lat_acc_trip_exp' + str(exp) + '.p'
exp += 1

In [None]:
start_time = time.time()
motif_dic_list, ts_1d = emd.find_motifs_from_emd(ts, R, win_size, paa_size, alphabet_size, adaptive_break_points)
pickle.dump(motif_dic_list, open(os.path.join(output_folder, motif_file_name), "wb"))
print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif_df = create_motif_table(motif_dic_list)
motif_df.sort_values('mdl_cost').head(10)

In [None]:
start_time = time.time()
pruned_motif_dic_list = prune_motifs_with_mdl(ts, motif_dic_list, R)
pruned_motif_df = create_motif_table(pruned_motif_dic_list)
print("Prunning algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))
pruned_motif_df

In [None]:
plot_k_motifs(len(pruned_motif_df), ts, events_ts, pruned_motif_dic_list, 'lateral acceleration')

In [None]:
center_ts_list = []
for motif_dic in motif_dic_list:
    motif_center_ts = [ts[i] for i in motif_dic['center_ts_pointers']]
    center_ts_list.append(motif_center_ts)
center_dist_mat = clust.compute_dtw_distance_matrix(center_ts_list, parallel=True)
np.save(os.path.join(output_folder, dist_file_name), center_dist_mat)

## Experiment 3

In [None]:
R = 0.2
win_size = 20
paa_size = 2
alphabet_size = 5
adaptive_break_points = True

motif_file_name = 'motif_lat_acc_trip_exp' + str(exp) + '.p'
dist_file_name = 'dist_lat_acc_trip_exp' + str(exp) + '.p'
exp += 1

In [None]:
start_time = time.time()
motif_dic_list, ts_1d = emd.find_motifs_from_emd(ts, R, win_size, paa_size, alphabet_size, adaptive_break_points)
pickle.dump(motif_dic_list, open(os.path.join(output_folder, motif_file_name), "wb"))
print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif_df = create_motif_table(motif_dic_list)
motif_df.sort_values('mdl_cost').head(10)

In [None]:
start_time = time.time()
pruned_motif_dic_list = prune_motifs_with_mdl(ts, motif_dic_list, R)
pruned_motif_df = create_motif_table(pruned_motif_dic_list)
print("Prunning algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))
pruned_motif_df

In [None]:
plot_k_motifs(len(pruned_motif_df), ts, events_ts, pruned_motif_dic_list, 'lateral acceleration')

In [None]:
center_ts_list = []
for motif_dic in motif_dic_list:
    motif_center_ts = [ts[i] for i in motif_dic['center_ts_pointers']]
    center_ts_list.append(motif_center_ts)
center_dist_mat = clust.compute_dtw_distance_matrix(center_ts_list, parallel=True)
np.save(os.path.join(output_folder, dist_file_name), center_dist_mat)

## Experiment 4

In [None]:
R = 0.2
win_size = 15
paa_size = 3
alphabet_size = 7
adaptive_break_points = False

motif_file_name = 'motif_lat_acc_trip_exp' + str(exp) + '.p'
dist_file_name = 'dist_lat_acc_trip_exp' + str(exp) + '.p'
exp += 1

In [None]:
start_time = time.time()
motif_dic_list, ts_1d = emd.find_motifs_from_emd(ts, R, win_size, paa_size, alphabet_size, adaptive_break_points)
pickle.dump(motif_dic_list, open(os.path.join(output_folder, motif_file_name), "wb"))
print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif_df = create_motif_table(motif_dic_list)
motif_df.sort_values('mdl_cost').head(10)

In [None]:
start_time = time.time()
pruned_motif_dic_list = prune_motifs_with_mdl(ts, motif_dic_list, R)
pruned_motif_df = create_motif_table(pruned_motif_dic_list)
print("Prunning algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))
pruned_motif_df

In [None]:
plot_k_motifs(len(pruned_motif_df), ts, events_ts, pruned_motif_dic_list, 'lateral acceleration')

In [None]:
center_ts_list = []
for motif_dic in motif_dic_list:
    motif_center_ts = [ts[i] for i in motif_dic['center_ts_pointers']]
    center_ts_list.append(motif_center_ts)
center_dist_mat = clust.compute_dtw_distance_matrix(center_ts_list, parallel=True)
np.save(os.path.join(output_folder, dist_file_name), center_dist_mat)

## Experiment 5

In [None]:
R = 0.2
win_size = 20
paa_size = 4
alphabet_size = 7
adaptive_break_points = False

motif_file_name = 'motif_lat_acc_trip_exp' + str(exp) + '.p'
dist_file_name = 'dist_lat_acc_trip_exp' + str(exp) + '.p'
exp += 1

In [None]:
start_time = time.time()
motif_dic_list, ts_1d = emd.find_motifs_from_emd(ts, R, win_size, paa_size, alphabet_size, adaptive_break_points)
pickle.dump(motif_dic_list, open(os.path.join(output_folder, motif_file_name), "wb"))
print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif_df = create_motif_table(motif_dic_list)
motif_df.sort_values('mdl_cost').head(10)

In [None]:
start_time = time.time()
pruned_motif_dic_list = prune_motifs_with_mdl(ts, motif_dic_list, R)
pruned_motif_df = create_motif_table(pruned_motif_dic_list)
print("Prunning algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))
pruned_motif_df

In [None]:
plot_k_motifs(len(pruned_motif_df), ts, events_ts, pruned_motif_dic_list, 'lateral acceleration')

In [None]:
center_ts_list = []
for motif_dic in motif_dic_list:
    motif_center_ts = [ts[i] for i in motif_dic['center_ts_pointers']]
    center_ts_list.append(motif_center_ts)
center_dist_mat = clust.compute_dtw_distance_matrix(center_ts_list, parallel=True)
np.save(os.path.join(output_folder, dist_file_name), center_dist_mat)

## Experiment 6

In [None]:
R = 0.2
win_size = 20
paa_size = 2
alphabet_size = 7
adaptive_break_points = False

motif_file_name = 'motif_lat_acc_trip_exp' + str(exp) + '.p'
dist_file_name = 'dist_lat_acc_trip_exp' + str(exp) + '.p'
exp += 1

In [None]:
start_time = time.time()
motif_dic_list, ts_1d = emd.find_motifs_from_emd(ts, R, win_size, paa_size, alphabet_size, adaptive_break_points)
pickle.dump(motif_dic_list, open(os.path.join(output_folder, motif_file_name), "wb"))
print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif_df = create_motif_table(motif_dic_list)
motif_df.sort_values('mdl_cost').head(10)

In [None]:
start_time = time.time()
pruned_motif_dic_list = prune_motifs_with_mdl(ts, motif_dic_list, R)
pruned_motif_df = create_motif_table(pruned_motif_dic_list)
print("Prunning algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))
pruned_motif_df

In [None]:
plot_k_motifs(len(pruned_motif_df), ts, events_ts, pruned_motif_dic_list, 'lateral acceleration')

In [None]:
center_ts_list = []
for motif_dic in motif_dic_list:
    motif_center_ts = [ts[i] for i in motif_dic['center_ts_pointers']]
    center_ts_list.append(motif_center_ts)
center_dist_mat = clust.compute_dtw_distance_matrix(center_ts_list, parallel=True)
np.save(os.path.join(output_folder, dist_file_name), center_dist_mat)