In [None]:
# This scripts runs more than 8 hours

In [16]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from dtw import dtw, accelerated_dtw
from numpy.linalg import norm
from joblib import Parallel, delayed
import pickle

In [17]:
class ProcessData:
    def __init__(self, path, obj_name, obj_number):
        self.path = path
        self.obj_name = obj_name
        self.obj_number = obj_number
        self.WINDOW_SIZE = 4
        
        # prepare file names
        fpath_bio  = self.path + self.obj_name + '/' + self.obj_name + '_' + str(self.obj_number) + '_bio.csv'

        
        # read files
        df_bio = pd.read_csv(fpath_bio, index_col=False)
        df_bio.timestamp = df_bio.timestamp - df_bio.timestamp[0]
        df_bio_temp = df_bio[ (df_bio.timestamp >= 1) & (df_bio.timestamp <= 1+self.WINDOW_SIZE) ]
        # impute if small data
        if df_bio_temp.shape[0] == 399:
            df_bio_temp = df_bio_temp.append(df_bio.iloc[df_bio_temp.index[-1] + 1])
        self.df_bio = df_bio_temp
    def get_electrode_data(self):
        elctrd_cols = []
        for i in range(1,20):
            elctrd_cols.append('ele' + str(i))
        return self.df_bio[elctrd_cols].values
    def get_pac_data(self):
        pac_vals = []
        for i in range(1,23):
            pac_vals.append('pac' + str(i))
            
        count = 0
        pac_values = np.zeros(self.WINDOW_SIZE*100*22)
        for ind in self.df_bio[pac_vals].index:
            pac_values[count:count+22] = self.df_bio.loc[ind, pac_vals].values
            count += 22
        return pac_values
    def get_timestamp(self):
        return self.df_bio.timestamp.values

In [18]:
fpath = '../material_data_Feb/'
mat_obj1 = ProcessData(fpath ,'mat15', 1)
mat_obj2 = ProcessData(fpath ,'mat5', 2)

In [26]:
mat_obj1.get_pac_data().shape

(8800,)

In [136]:
a = mat_obj1.get_pac_data()[:1000]
b = mat_obj2.get_pac_data()[:1000]

In [137]:
%%time
# precomputed distance based on DWT
normalized_dist = lambda x, y: norm(x - y, ord=1)
manhattan_distance = lambda x, y: np.abs(x - y)
d, cost_matrix, acc_cost_matrix, path = dtw(a, b, dist=manhattan_distance, w=100)

CPU times: user 943 ms, sys: 0 ns, total: 943 ms
Wall time: 941 ms


In [49]:
d

0.0

In [34]:
%%time
big_pac_data = np.zeros([21*50, 8800])
labels = []
count = 0
current_label = -1
for i in range(1,22):
    current_label += 1
    mat_name='mat'+str(i)
    print(mat_name)
    for j in range(1,51):
        mat_obj = ProcessData(fpath ,mat_name, j)
        big_pac_data[count,:] = mat_obj.get_pac_data()
        labels.append(current_label)
        count += 1

mat1
mat2
mat3
mat4
mat5
mat6
mat7
mat8
mat9
mat10
mat11
mat12
mat13
mat14
mat15
mat16
mat17
mat18
mat19
mat20
mat21
CPU times: user 4min 9s, sys: 1.77 s, total: 4min 11s
Wall time: 4min 12s


In [171]:
labels = np.array(labels)

In [174]:
np.save('auxiliaries/labels.npy', labels)

In [41]:
big_pac_data.shape

(1050, 8800)

In [None]:
import itertools
list(itertools.permutations([1, 2, 3]))

In [143]:
big_list_tact = []
for i in range(big_pac_data.shape[0]):
    for j in range(i,big_pac_data.shape[0]):
        big_list_tact.append([i,j])

In [144]:
def tact_calc_dtw(_iii, _jjj):
    if _iii==_jjj:
        return 0.0
    _d, _, _, _ = dtw(big_pac_data[_iii,:1000], big_pac_data[_jjj,:1000], dist=manhattan_distance, w=100)
    return _d

In [145]:
res = Parallel(n_jobs=25)(delayed(tact_calc_dtw)(*zz) for zz in big_list_tact)

In [165]:
pickle.dump(res, open('auxiliaries/dwt_distance_ut.pk', 'wb'))

In [148]:
len(res)

551775

In [160]:
tri = np.zeros((21*50, 21*50))
tri[np.triu_indices(21*50)] = res

In [163]:
tri = tri + tri.transpose()

In [164]:
tri

array([[    0., 22277., 26341., ..., 43323., 38516., 37782.],
       [22277.,     0., 22852., ..., 43643., 54142., 45221.],
       [26341., 22852.,     0., ..., 62587., 56986., 50774.],
       ...,
       [43323., 43643., 62587., ...,     0., 45945., 37834.],
       [38516., 54142., 56986., ..., 45945.,     0., 22143.],
       [37782., 45221., 50774., ..., 37834., 22143.,     0.]])

In [166]:
pickle.dump(tri, open('auxiliaries/dwt_distance_full.pk', 'wb'))