## Loading the data and Initializing the DTW object
#### Load and inspect data

In [1]:
import libdtw as lib
import model as mod

from tqdm import tqdm
import os
from copy import deepcopy
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd

  from numpy.core.umath_tests import inner1d


In [2]:
data = lib.load_data(data_path="data/ope3_26.pickle", n_to_keep=10)
print(data.keys())
print('Reference: %s'%data['reference'])

dict_keys(['5153', '5214', '5242', '5245', '5250', '5269', '5300', '5341', '5342', '5426', 'reference'])
Reference: 5153


In [3]:
D = lib.Dtw(json_obj=data, random_weights=True, scaling='group')

In [4]:
D.data.keys()

dict_keys(['ref_id', 'reference', 'queries', 'num_queries', 'warpings', 'distances', 'warp_dist', 'queriesID', 'time_distortion', 'distance_distortion', 'warpings_per_step_pattern', 'feat_weights'])

In [5]:
D.data_open_ended.keys()

dict_keys(['ref_id', 'reference', 'queries', 'warp_dist'])

#### Calling DTW on a particular query

In [6]:
D.call_dtw(query_id='5214', step_pattern='symmetricP05', get_results=False, open_ended=False)

In [7]:
D.data['warpings'].keys()

dict_keys(['5214'])

In [8]:
print(D.data['warpings']) # warpings refer to the last step pattern utilized

{'5214': [(0, 0), (1, 1), (2, 1), (3, 1), (4, 2), (5, 3), (6, 4), (7, 5), (8, 6), (9, 7), (10, 8), (11, 9), (12, 10), (13, 11), (14, 12), (15, 13), (16, 14), (17, 15), (18, 16), (19, 17), (20, 18), (21, 19), (22, 20), (23, 21), (24, 22), (25, 23), (26, 24), (27, 25), (28, 26), (29, 27), (30, 28), (31, 29), (32, 30), (33, 31), (34, 32), (35, 33), (36, 34), (37, 35), (38, 36), (39, 37), (40, 38), (41, 39), (42, 40), (43, 41), (44, 42), (45, 43), (46, 44), (47, 45), (48, 46), (49, 47), (50, 48), (51, 49), (52, 50), (53, 51), (54, 52), (55, 53), (56, 54), (57, 55), (58, 56), (59, 57), (60, 58), (61, 59), (62, 60), (63, 61), (64, 62), (65, 63), (66, 64), (67, 65), (68, 66), (69, 67), (70, 68), (71, 69), (72, 70), (73, 71), (74, 72), (75, 73), (76, 74), (77, 75), (78, 76), (79, 77), (80, 78), (81, 79), (82, 80), (83, 81), (84, 82), (85, 83), (86, 84), (87, 85), (88, 86), (89, 87), (90, 88), (91, 89), (92, 90), (93, 91), (94, 92), (95, 93), (96, 94), (97, 95), (98, 96), (99, 97), (100, 98), (

In [9]:
D.data['warpings_per_step_pattern'].keys()

dict_keys(['symmetricP05'])

#### Calling DTW on every query

In [10]:
for query_id in D.data['queriesID']:
    D.call_dtw(query_id=query_id, step_pattern='symmetricP05', get_results=False, open_ended=False)

In [11]:
D.data['warpings'].keys()

dict_keys(['5214', '5242', '5245', '5250', '5269', '5300', '5341', '5342', '5426'])

In [12]:
D.data['time_distortion']

defaultdict(dict,
            {'symmetricP05': {'5214': 2,
              '5242': 2,
              '5245': 3,
              '5250': 1,
              '5269': 3,
              '5300': 0,
              '5341': 2,
              '5342': 1,
              '5426': 3}})

In [13]:
D.data['distance_distortion']

defaultdict(dict,
            {'symmetricP05': {'5214': 83.38148620490398,
              '5242': 273.78300323109823,
              '5245': 287.5751649603823,
              '5250': 318.6153892723092,
              '5269': 528.497676498432,
              '5300': 696.0239426645419,
              '5341': 1203.0544587646134,
              '5342': 1207.4919716975749,
              '5426': 195.87826934880425}})

## Variables weights optimization

## Step pattern selection

## Prediction

In [14]:
for _id in D.data['queriesID']:
        D.call_dtw(_id, step_pattern='symmetricP05', n_jobs=1, open_ended=True, all_sub_seq=True)

In [15]:
D.data_open_ended['warp_dist'].keys()

dict_keys(['5214', '5242', '5245', '5250', '5269', '5300', '5341', '5342', '5426'])

In [16]:
D.data_open_ended['warp_dist']['5242']

[(0, 0, 137.23301364914593),
 (1, 1, 205.849314208922),
 (1, 2, 219.572589579756),
 (1, 3, 228.7214428384132),
 (2, 4, 240.15745837589157),
 (2, 5, 243.96949193258692),
 (2, 6, 247.01910460943418),
 (3, 7, 251.59350590407257),
 (3, 8, 253.35289425172792),
 (3, 9, 254.86095013380032),
 (4, 10, 257.31151101653825),
 (4, 11, 258.32057610568614),
 (4, 12, 259.21759683855385),
 (5, 13, 260.7424560175771),
 (5, 14, 261.3960666624641),
 (5, 15, 261.9902210913359),
 (6, 16, 263.02986980299767),
 (6, 17, 263.4874104079045),
 (6, 18, 263.9097255637657),
 (7, 19, 264.6637511900861),
 (7, 20, 265.00184211864195),
 (7, 21, 265.31734135853344),
 (8, 22, 265.88911154337603),
 (8, 23, 266.1490451133189),
 (8, 24, 266.39369171371703),
 (9, 25, 266.84215482418296),
 (9, 26, 267.0482481415926),
 (9, 27, 267.2435105456386),
 (10, 28, 267.6046535220883),
 (10, 29, 267.7720646446803),
 (10, 30, 267.93151627819293),
 (11, 31, 268.2285675071971),
 (11, 32, 268.3672465431694),
 (11, 33, 268.4999060697552),
 (1

In [17]:
x, y = mod.generate_dataset_xy(2, 6, '5242', D, data)

In [18]:
x

Unnamed: 0_level_0,DTW_distance,length,ba_FQYXdr6Q0,ba_FC3BnZ,ba_FmYOrhonkk,ba_PCPUSq5ah,ba_SinhbKA,ba_TIJSj,ba_TrWfpiPrs,ba_TC06nRC,...,ba_FCGUglDXyPKrnk38,ba_FCDvhG4,ba_TDZyHMh1UZ,ba_TDZV2zDOrmiYP,ba_PfrAayv8,ba_FQC6tzztBV4DeXemfp,ba_TZv9w2dOu,ba_TZIm086h,ba_TZHx41ABpBbhN,ba_TZWZNzFFdHb
query_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5242,247.019105,7,2072.264,0.0,5.366,904.98,34.9854,90.674,15.271,90.6,...,1300.0,0.0,31.3,32.0,8.379,114384.0,91.4,91.7,59.8,59.2
5214,73.252153,5,832.497,0.0,5.554,804.492,35.8643,88.11,23.73,87.8,...,1300.0,0.0,28.3,27.3,8.237,114385.0,88.5,89.0,59.8,60.2
5214,74.414864,6,832.497,0.0,5.554,804.492,35.8643,88.11,23.73,87.8,...,1300.0,0.0,28.3,27.3,8.237,114385.0,88.5,89.0,59.8,60.2
5214,75.345052,7,832.497,0.0,5.554,804.492,35.8643,88.11,23.73,87.8,...,1300.0,0.0,28.3,27.3,8.237,114385.0,88.5,89.0,59.8,60.2
5242,73.252153,5,2072.264,0.0,5.366,904.98,34.9854,90.674,15.271,90.6,...,1300.0,0.0,31.3,32.0,8.379,114384.0,91.4,91.7,59.8,59.2
5242,74.414864,6,2072.264,0.0,5.366,904.98,34.9854,90.674,15.271,90.6,...,1300.0,0.0,31.3,32.0,8.379,114384.0,91.4,91.7,59.8,59.2
5242,75.345052,7,2072.264,0.0,5.366,904.98,34.9854,90.674,15.271,90.6,...,1300.0,0.0,31.3,32.0,8.379,114384.0,91.4,91.7,59.8,59.2
5245,252.281737,5,2162.246,0.0,9.365,806.25,35.376,86.353,13.22,88.6,...,1300.0,0.0,28.7,29.3,8.389,114383.0,88.4,88.5,59.6,59.2
5245,256.286155,6,2162.246,0.0,9.365,806.25,35.376,86.353,13.22,88.6,...,1300.0,0.0,28.7,29.3,8.389,114383.0,88.4,88.5,59.6,59.2
5245,259.489694,7,2162.246,0.0,9.365,806.25,35.376,86.353,13.22,88.6,...,1300.0,0.0,28.7,29.3,8.389,114383.0,88.4,88.5,59.6,59.2


In [19]:
y

array([( True, 406.), ( True, 408.), ( True, 407.), ( True, 406.),
       ( True, 408.), ( True, 407.), ( True, 406.), ( True, 413.),
       ( True, 412.), ( True, 411.), ( True, 411.), ( True, 410.),
       ( True, 409.), ( True, 407.), ( True, 406.), ( True, 405.),
       ( True, 410.), ( True, 409.), ( True, 408.), ( True, 408.),
       ( True, 407.), ( True, 406.), ( True, 411.), ( True, 410.),
       ( True, 409.), ( True, 407.), ( True, 406.), ( True, 405.)],
      dtype=[('status', '?'), ('time_remaining', '<f8')])

### Need some work to get train and test set

In [20]:
loc = x.index.get_loc('5242')
loc

array([ True, False, False, False,  True,  True,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False])

In [21]:
x_train = x.loc[x.index != '5242', :]
x_test = pd.DataFrame(x.loc['5242'])

y_test = y[loc]
y_train = np.delete(y, loc)

  """


In [22]:
x_train

Unnamed: 0_level_0,DTW_distance,length,ba_FQYXdr6Q0,ba_FC3BnZ,ba_FmYOrhonkk,ba_PCPUSq5ah,ba_SinhbKA,ba_TIJSj,ba_TrWfpiPrs,ba_TC06nRC,...,ba_FCGUglDXyPKrnk38,ba_FCDvhG4,ba_TDZyHMh1UZ,ba_TDZV2zDOrmiYP,ba_PfrAayv8,ba_FQC6tzztBV4DeXemfp,ba_TZv9w2dOu,ba_TZIm086h,ba_TZHx41ABpBbhN,ba_TZWZNzFFdHb
query_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5214,73.252153,5,832.497,0.0,5.554,804.492,35.8643,88.11,23.73,87.8,...,1300.0,0.0,28.3,27.3,8.237,114385.0,88.5,89.0,59.8,60.2
5214,74.414864,6,832.497,0.0,5.554,804.492,35.8643,88.11,23.73,87.8,...,1300.0,0.0,28.3,27.3,8.237,114385.0,88.5,89.0,59.8,60.2
5214,75.345052,7,832.497,0.0,5.554,804.492,35.8643,88.11,23.73,87.8,...,1300.0,0.0,28.3,27.3,8.237,114385.0,88.5,89.0,59.8,60.2
5245,252.281737,5,2162.246,0.0,9.365,806.25,35.376,86.353,13.22,88.6,...,1300.0,0.0,28.7,29.3,8.389,114383.0,88.4,88.5,59.6,59.2
5245,256.286155,6,2162.246,0.0,9.365,806.25,35.376,86.353,13.22,88.6,...,1300.0,0.0,28.7,29.3,8.389,114383.0,88.4,88.5,59.6,59.2
5245,259.489694,7,2162.246,0.0,9.365,806.25,35.376,86.353,13.22,88.6,...,1300.0,0.0,28.7,29.3,8.389,114383.0,88.4,88.5,59.6,59.2
5250,279.394296,5,2363.822,0.0,5.127,840.82,35.8643,88.916,18.054,88.8,...,1300.0,0.0,29.1,30.1,8.384,114383.0,89.0,89.4,59.2,59.3
5250,283.829096,6,2363.822,0.0,5.127,840.82,35.8643,88.916,18.054,88.8,...,1300.0,0.0,29.1,30.1,8.384,114383.0,89.0,89.4,59.2,59.3
5250,287.376937,7,2363.822,0.0,5.127,840.82,35.8643,88.916,18.054,88.8,...,1300.0,0.0,29.1,30.1,8.384,114383.0,89.0,89.4,59.2,59.3
5269,463.268072,5,3730.081,0.0,5.435,894.141,35.2295,84.741,16.626,89.9,...,1300.0,0.0,30.4,30.8,8.198,114382.0,89.9,91.2,60.0,59.8


In [23]:
x_test.T

query_id,5242,5242.1,5242.2,5242.3
DTW_distance,247.019105,73.252153,74.414864,75.345052
length,7.0,5.0,6.0,7.0
ba_FQYXdr6Q0,2072.264,2072.264,2072.264,2072.264
ba_FC3BnZ,0.0,0.0,0.0,0.0
ba_FmYOrhonkk,5.366,5.366,5.366,5.366
ba_PCPUSq5ah,904.98,904.98,904.98,904.98
ba_SinhbKA,34.9854,34.9854,34.9854,34.9854
ba_TIJSj,90.674,90.674,90.674,90.674
ba_TrWfpiPrs,15.271,15.271,15.271,15.271
ba_TC06nRC,90.6,90.6,90.6,90.6


In [24]:
y_train

array([( True, 407.), ( True, 406.), ( True, 408.), ( True, 407.),
       ( True, 406.), ( True, 413.), ( True, 412.), ( True, 411.),
       ( True, 411.), ( True, 410.), ( True, 409.), ( True, 407.),
       ( True, 406.), ( True, 405.), ( True, 410.), ( True, 409.),
       ( True, 408.), ( True, 408.), ( True, 407.), ( True, 406.),
       ( True, 411.), ( True, 410.), ( True, 409.), ( True, 407.),
       ( True, 406.), ( True, 405.)],
      dtype=[('status', '?'), ('time_remaining', '<f8')])

In [25]:
y_test

array([( True, 406.), ( True, 408.), ( True, 407.), ( True, 406.)],
      dtype=[('status', '?'), ('time_remaining', '<f8')])

### Fitting and scoring the model

In [26]:
lm = mod.Estimator(dtw_obj=D).fit(x_train, y_train) # LinearRegression as standard model

ValueError: Found input variables with inconsistent numbers of samples: [24, 26, 26]

In [None]:
lm.score(x_test.T, y_test)

In [None]:
rf = mod.Estimator(dtw_obj=D, regressor=RandomForestRegressor()).fit(x_train, y_train)

In [None]:
rf.score(x_test.T, y_test)