In [1]:
# place all the points on the map of last-seen-nonstat, excluding itself
# folds information actually not necessary
import os
from tqdm import tqdm
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
print(os.listdir("../input/eyn-original"))
x_min, x_max = -1., 1.
y_min, y_max = -.3, .3

QUERY_SIZE = 10

['train_data.npy', 'trn_index_list.npy', 'custom.css', 'val_index_list.npy', '__results__.html', 'train_targets_inside.npy', 'submission.csv', 'train_targets.npy', 'test_ids.npy', '__output__.json', 'test_data.npy', '__notebook__.ipynb']


In [2]:
train_data = np.load("../input/eyn-original/train_data.npy")
test_data = np.load("../input/eyn-original/test_data.npy")
train_targets = np.load("../input/eyn-original/train_targets.npy")
train_targets_inside = np.load("../input/eyn-original/train_targets_inside.npy")
train_targets_inside_indexes = np.argwhere(train_targets_inside == 1)
print(train_data.shape, train_targets.shape)
print(train_targets_inside.shape, train_targets_inside_indexes.shape)
print(test_data.shape)
# 't_entry','t_exit','x_entry','y_entry','x_exit','y_exit','vmax','vmin','vmean','tid_0','tid_1'

(134063, 21, 11) (134063, 2)
(134063,) (36380, 1)
(33515, 21, 11)


In [3]:
# the queries
train_entry_loc = np.concatenate((train_data[:,:,2:4], 
                                  train_data[:,:,0:2]*0.0001 + 
                                  np.random.randn(*train_data[:,:,0:2].shape)*0.000001), axis=2)
train_exit_loc = np.concatenate((train_data[:,:,4:6], train_data[:,:,0:2]*0.0001), axis=2)
test_entry_loc = np.concatenate((test_data[:,:,2:4], test_data[:,:,0:2]*0.0001), axis=2)
test_exit_loc = np.concatenate((test_data[:,:,4:6], test_data[:,:,0:2]*0.0001), axis=2)
print(train_entry_loc.shape, train_exit_loc.shape)
print(test_entry_loc.shape, test_exit_loc.shape)

(134063, 21, 4) (134063, 21, 4)
(33515, 21, 4) (33515, 21, 4)


In [4]:
# building the reference
train_last_nonstat_indexes = np.load("../input/eyn-folds/train_last_not_stationary.npy")
print(train_last_nonstat_indexes.shape)
train_last_nonstat_loc = train_entry_loc[train_last_nonstat_indexes, 0, :] 
train_last_nonstat_target = train_targets_inside[train_last_nonstat_indexes]
print(train_last_nonstat_loc.shape, train_last_nonstat_target.shape)

(67065,)
(67065, 4) (67065,)


In [5]:
from scipy import spatial

reference = train_last_nonstat_loc
reference_tree = spatial.KDTree(reference)

In [6]:
train_entry_query_dist, train_entry_query_indexes = reference_tree.query(train_entry_loc,k=QUERY_SIZE)
print(train_entry_query_dist.shape, train_entry_query_indexes.shape)

(134063, 21, 10) (134063, 21, 10)


In [7]:
train_exit_query_dist, train_exit_query_indexes = reference_tree.query(train_exit_loc,k=QUERY_SIZE)
print(train_exit_query_dist.shape, train_exit_query_indexes.shape)

(134063, 21, 10) (134063, 21, 10)


In [8]:
test_entry_query_dist, test_entry_query_indexes = reference_tree.query(test_entry_loc,k=QUERY_SIZE)
print(test_entry_query_dist.shape, test_entry_query_indexes.shape)

(33515, 21, 10) (33515, 21, 10)


In [9]:
test_exit_query_dist, test_exit_query_indexes = reference_tree.query(test_exit_loc,k=QUERY_SIZE)
print(test_exit_query_dist.shape, test_exit_query_indexes.shape)

(33515, 21, 10) (33515, 21, 10)


In [10]:
train_entry_query_target = np.take(train_last_nonstat_target, train_entry_query_indexes, mode = 'clip')
train_exit_query_target = np.take(train_last_nonstat_target, train_exit_query_indexes, mode = 'clip')
test_entry_query_target = np.take(train_last_nonstat_target, test_entry_query_indexes, mode = 'clip')
test_exit_query_target = np.take(train_last_nonstat_target, test_exit_query_indexes, mode = 'clip')
print(train_entry_query_target.shape, train_exit_query_target.shape)
print(test_entry_query_target.shape, test_exit_query_target.shape)

(134063, 21, 10) (134063, 21, 10)
(33515, 21, 10) (33515, 21, 10)


In [11]:
# final exit values should be nans
train_exit_query_target[:,0,:] = np.nan
train_exit_query_dist[:,0,:] = np.nan
test_exit_query_target[:,0,:] = np.nan
test_exit_query_dist[:,0,:] = np.nan

# should exclude the last-seen-position
train_entry_query_target[:,0,0] = np.nan
train_entry_query_dist[:,0,0] = np.nan

In [12]:
print(train_entry_query_target.shape, train_exit_query_target.shape)
print(train_entry_query_dist.shape, train_exit_query_dist.shape)
print(test_entry_query_target.shape, test_exit_query_target.shape)
print(test_entry_query_dist.shape, test_exit_query_dist.shape)

(134063, 21, 10) (134063, 21, 10)
(134063, 21, 10) (134063, 21, 10)
(33515, 21, 10) (33515, 21, 10)
(33515, 21, 10) (33515, 21, 10)


In [13]:
df_train = pd.read_pickle("../input/eyn-pre-unravel-df/df_train.pickle")
df_test = pd.read_pickle("../input/eyn-pre-unravel-df/df_test.pickle")
df_original_columns = df_train.columns

In [14]:
entry_col_names_target = ["at{}".format(i) for i in range(QUERY_SIZE)]
entry_col_names_dist = ["ad{}".format(i) for i in range(QUERY_SIZE)]
exit_col_names_target = ["bt{}".format(i) for i in range(QUERY_SIZE)]
exit_col_names_dist = ["bd{}".format(i) for i in range(QUERY_SIZE)]

In [15]:
for i,(a,b,c,d) in enumerate(zip(entry_col_names_target, exit_col_names_target,
                                 entry_col_names_dist, exit_col_names_dist)):
    df_train[a] = train_entry_query_target[:,:,i].ravel()
    df_train[c] = train_entry_query_dist[:,:,i].ravel()
    df_train[b] = train_exit_query_target[:,:,i].ravel()
    df_train[d] = train_exit_query_dist[:,:,i].ravel()
    
    df_test[a] = test_entry_query_target[:,:,i].ravel()
    df_test[c] = test_entry_query_dist[:,:,i].ravel()
    df_test[b] = test_exit_query_target[:,:,i].ravel()
    df_test[d] = test_exit_query_dist[:,:,i].ravel()

In [16]:
# reinforce nan values for nan entries
df_train.loc[np.isnan(train_data[:,:,2].ravel()), :] = np.nan
df_test.loc[np.isnan(test_data[:,:,2].ravel()), :] = np.nan

In [17]:
df_train.head(8)

Unnamed: 0_level_0,Unnamed: 1_level_0,t_entry,t_exit,x_entry,y_entry,x_exit,y_exit,vmax,vmin,vmean,tid_0,tid_1,entry_in,exit_in,dur,dur_a,dist,dist_a,speed,speed_a,dir_x,dir_y,dir_x_a,dir_y_a,at0,ad0,bt0,bd0,at1,ad1,bt1,bd1,at2,ad2,bt2,bd2,at3,ad3,bt3,bd3,at4,ad4,bt4,bd4,at5,ad5,bt5,bd5,at6,ad6,bt6,bd6,at7,ad7,bt7,bd7,at8,ad8,bt8,bd8,at9,ad9,bt9,bd9
seq,traj,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1
0,0,0.004194,0.030917,-1.595688,-0.429282,,,,,,5.0,1.0,0.0,,0.026722,0.040611,,0.03768,,0.927831,,,0.094538,0.995521,,,,,0.0,0.001438,,,0.0,0.001978,,,0.0,0.002254,,,0.0,0.002459,,,0.0,0.002761,,,0.0,0.003468,,,0.0,0.004074,,,0.0,0.004421,,,0.0,0.004642,,
0,1,-0.036417,-0.036417,-1.59925,-0.466794,-1.59925,-0.466794,,,,4.0,1.0,0.0,0.0,0.0,0.601278,0.0,0.050029,0.0,0.083205,0.0,0.0,0.200257,0.979743,0.0,0.000724,0.0,0.000724,0.0,0.001074,0.0,0.001074,0.0,0.001173,0.0,0.001173,0.0,0.001282,0.0,0.001282,0.0,0.001336,0.0,0.001336,0.0,0.001341,0.0,0.001341,0.0,0.001341,0.0,0.001341,0.0,0.001809,0.0,0.001809,0.0,0.00193,0.0,0.00193,0.0,0.002164,0.0,0.002164
0,2,-0.670278,-0.637694,-1.602144,-0.533842,-1.609269,-0.51581,,,,3.0,1.0,0.0,0.0,0.032583,0.024028,0.019388,0.008491,0.595032,0.353365,-0.367464,0.930038,0.760436,0.649413,0.0,0.000145,0.0,0.002898,0.0,0.000145,0.0,0.00658,0.0,0.000147,0.0,0.006816,0.0,0.000147,0.0,0.006963,0.0,0.000149,0.0,0.007184,0.0,0.000243,0.0,0.007247,0.0,0.000268,0.0,0.007351,0.0,0.000333,0.0,0.007609,0.0,0.000334,0.0,0.00785,0.0,0.000336,0.0,0.008035
0,3,-0.710778,-0.694306,-1.603369,-0.546593,-1.608601,-0.539355,,,,2.0,1.0,0.0,0.0,0.016472,0.046389,0.008931,0.261206,0.54217,5.630798,-0.585843,0.810425,-0.040913,0.999163,0.0,0.001014,0.0,0.000649,0.0,0.001377,0.0,0.000758,0.0,0.001447,0.0,0.00096,0.0,0.001587,0.0,0.001008,0.0,0.001615,0.0,0.001141,0.0,0.001627,0.0,0.00156,0.0,0.001855,0.0,0.001563,0.0,0.001967,0.0,0.001708,0.0,0.002183,0.0,0.00198,0.0,0.002303,0.0,0.002017
0,4,-0.765722,-0.757167,-1.696432,-0.835619,-1.592682,-0.807581,,,,1.0,1.0,0.0,0.0,0.008556,0.020056,0.107472,1.967866,12.56163,98.120718,0.965368,0.260892,-0.324647,-0.945835,0.0,0.01633,0.0,0.000426,0.0,0.022404,0.0,0.000426,0.0,0.027182,0.0,0.000426,0.0,0.029146,0.0,0.000736,0.0,0.030046,0.0,0.001119,0.0,0.030737,0.0,0.001509,0.0,0.030909,0.0,0.002791,0.0,0.03149,0.0,0.003119,0.0,0.032276,0.0,0.005444,0.0,0.032572,0.0,0.005448
0,5,-0.792472,-0.785778,-0.988774,1.44925,-1.057569,1.025657,,,,0.0,1.0,0.0,0.0,0.006694,,0.429143,,64.104337,,-0.160309,-0.987067,,,0.0,0.001168,0.0,0.004895,0.0,0.004749,0.0,0.005385,0.0,0.00655,0.0,0.006419,0.0,0.00836,0.0,0.006867,0.0,0.008456,0.0,0.006927,0.0,0.008668,0.0,0.01659,0.0,0.009664,0.0,0.016716,0.0,0.010598,0.0,0.016822,0.0,0.010867,0.0,0.017924,0.0,0.011113,0.0,0.023875
0,6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [18]:
df_test.head(6)

Unnamed: 0_level_0,Unnamed: 1_level_0,t_entry,t_exit,x_entry,y_entry,x_exit,y_exit,vmax,vmin,vmean,tid_0,tid_1,entry_in,exit_in,dur,dur_a,dist,dist_a,speed,speed_a,dir_x,dir_y,dir_x_a,dir_y_a,at0,ad0,bt0,bd0,at1,ad1,bt1,bd1,at2,ad2,bt2,bd2,at3,ad3,bt3,bd3,at4,ad4,bt4,bd4,at5,ad5,bt5,bd5,at6,ad6,bt6,bd6,at7,ad7,bt7,bd7,at8,ad8,bt8,bd8,at9,ad9,bt9,bd9
seq,traj,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1
0,0,0.005889,0.017556,1.221697,0.940063,,,,,,5.0,31.0,0.0,,0.011667,0.133861,,0.002757,,0.020592,,,-0.444227,-0.895914,0.0,0.00646,,,0.0,0.006684,,,0.0,0.008206,,,0.0,0.008763,,,0.0,0.014911,,,0.0,0.017428,,,0.0,0.018382,,,0.0,0.018608,,,0.0,0.018749,,,0.0,0.019193,,
0,1,-0.157417,-0.127972,1.248303,1.254617,1.222922,0.942532,,,,4.0,31.0,0.0,0.0,0.029444,0.018944,0.313115,0.164695,10.63411,8.693565,-0.081059,-0.996709,0.988186,-0.153258,0.0,0.004577,0.0,0.004462,0.0,0.004788,0.0,0.005697,0.0,0.006291,0.0,0.005963,0.0,0.007225,0.0,0.011261,0.0,0.00846,0.0,0.012929,0.0,0.008797,0.0,0.015698,0.0,0.009841,0.0,0.015929,0.0,0.009887,0.0,0.016076,0.0,0.01183,0.0,0.016533,0.0,0.011843,0.0,0.016829
0,2,-0.242583,-0.176361,0.285834,0.254906,1.085554,1.279858,,,,3.0,31.0,1.0,0.0,0.066222,0.021389,1.30003,1.174368,19.631323,54.905527,0.615155,0.788407,-0.803733,-0.594991,1.0,0.000162,0.0,0.001893,0.0,0.00162,0.0,0.003955,1.0,0.00419,0.0,0.007061,1.0,0.004703,0.0,0.00753,0.0,0.005963,0.0,0.009792,1.0,0.006355,0.0,0.013626,0.0,0.006708,0.0,0.014199,1.0,0.00746,0.0,0.014901,0.0,0.0076,0.0,0.015777,0.0,0.00833,0.0,0.01648
0,3,-0.263972,-0.263972,1.229712,0.953644,1.229712,0.953644,0.0,0.0,0.0,2.0,31.0,0.0,0.0,0.0,0.052222,0.0,0.017715,0.0,0.339218,0.0,0.0,0.496436,0.868073,0.0,0.003539,0.0,0.003539,0.0,0.003782,0.0,0.003782,0.0,0.003999,0.0,0.003999,0.0,0.004392,0.0,0.004392,0.0,0.004566,0.0,0.004566,0.0,0.006051,0.0,0.006051,0.0,0.007894,0.0,0.007894,0.0,0.009124,0.0,0.009124,0.0,0.01003,0.0,0.01003,0.0,0.011442,0.0,0.011442
0,4,-0.327861,-0.316194,1.251197,1.406247,1.220918,0.938266,,,,0.0,31.0,0.0,0.0,0.011667,,0.468959,,40.196508,,-0.064566,-0.997913,,,0.0,0.000253,0.0,0.007049,0.0,0.00045,0.0,0.007982,0.0,0.000501,0.0,0.008195,0.0,0.003892,0.0,0.009996,0.0,0.003935,0.0,0.016512,0.0,0.007829,0.0,0.01726,0.0,0.008112,0.0,0.018897,0.0,0.008286,0.0,0.018978,0.0,0.009713,0.0,0.020278,0.0,0.011159,0.0,0.020502
0,5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [19]:
df_train[entry_col_names_target] = df_train[entry_col_names_target].astype('category')
df_train[exit_col_names_target] = df_train[exit_col_names_target].astype('category')
df_test[entry_col_names_target] = df_test[entry_col_names_target].astype('category')
df_test[exit_col_names_target] = df_test[exit_col_names_target].astype('category')
df_train = df_train.drop(columns=df_original_columns)
df_test = df_test.drop(columns=df_original_columns)

In [20]:
df_train.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,at0,ad0,bt0,bd0,at1,ad1,bt1,bd1,at2,ad2,bt2,bd2,at3,ad3,bt3,bd3,at4,ad4,bt4,bd4,at5,ad5,bt5,bd5,at6,ad6,bt6,bd6,at7,ad7,bt7,bd7,at8,ad8,bt8,bd8,at9,ad9,bt9,bd9
seq,traj,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
0,0,,,,,0.0,0.001438,,,0.0,0.001978,,,0.0,0.002254,,,0.0,0.002459,,,0.0,0.002761,,,0.0,0.003468,,,0.0,0.004074,,,0.0,0.004421,,,0.0,0.004642,,
0,1,0.0,0.000724,0.0,0.000724,0.0,0.001074,0.0,0.001074,0.0,0.001173,0.0,0.001173,0.0,0.001282,0.0,0.001282,0.0,0.001336,0.0,0.001336,0.0,0.001341,0.0,0.001341,0.0,0.001341,0.0,0.001341,0.0,0.001809,0.0,0.001809,0.0,0.00193,0.0,0.00193,0.0,0.002164,0.0,0.002164
0,2,0.0,0.000145,0.0,0.002898,0.0,0.000145,0.0,0.00658,0.0,0.000147,0.0,0.006816,0.0,0.000147,0.0,0.006963,0.0,0.000149,0.0,0.007184,0.0,0.000243,0.0,0.007247,0.0,0.000268,0.0,0.007351,0.0,0.000333,0.0,0.007609,0.0,0.000334,0.0,0.00785,0.0,0.000336,0.0,0.008035
0,3,0.0,0.001014,0.0,0.000649,0.0,0.001377,0.0,0.000758,0.0,0.001447,0.0,0.00096,0.0,0.001587,0.0,0.001008,0.0,0.001615,0.0,0.001141,0.0,0.001627,0.0,0.00156,0.0,0.001855,0.0,0.001563,0.0,0.001967,0.0,0.001708,0.0,0.002183,0.0,0.00198,0.0,0.002303,0.0,0.002017
0,4,0.0,0.01633,0.0,0.000426,0.0,0.022404,0.0,0.000426,0.0,0.027182,0.0,0.000426,0.0,0.029146,0.0,0.000736,0.0,0.030046,0.0,0.001119,0.0,0.030737,0.0,0.001509,0.0,0.030909,0.0,0.002791,0.0,0.03149,0.0,0.003119,0.0,0.032276,0.0,0.005444,0.0,0.032572,0.0,0.005448
0,5,0.0,0.001168,0.0,0.004895,0.0,0.004749,0.0,0.005385,0.0,0.00655,0.0,0.006419,0.0,0.00836,0.0,0.006867,0.0,0.008456,0.0,0.006927,0.0,0.008668,0.0,0.01659,0.0,0.009664,0.0,0.016716,0.0,0.010598,0.0,0.016822,0.0,0.010867,0.0,0.017924,0.0,0.011113,0.0,0.023875
0,6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [21]:
df_train.to_pickle("df_train_cluster.pickle")
df_test.to_pickle("df_test_cluster.pickle")

In [22]:
# to load
df_train = pd.read_pickle("df_train_cluster.pickle")
df_test = pd.read_pickle("df_test_cluster.pickle")
print(df_train.shape, df_test.shape)

(2815323, 40) (703815, 40)


In [23]:
str_list = []
for i in range(len(train_entry_query_dist)):
    str_list.append(str([train_entry_query_dist[i,0,:]]))
print(len(str_list))
print(len(set(str_list)))
print(len(np.array(str_list)[train_last_nonstat_indexes]))
print(len(set(np.array(str_list)[train_last_nonstat_indexes])))

134063
133877
67065
67065


In [24]:
a = np.argwhere((train_entry_query_indexes[:,0,0][train_last_nonstat_indexes] - np.arange(len(train_last_nonstat_indexes)))!=0)[:,0]
a

array([], dtype=int64)

In [25]:
train_entry_query_indexes[:,0,0][train_last_nonstat_indexes][a]

array([], dtype=int64)

In [26]:
train_entry_query_indexes[:,0,1][train_last_nonstat_indexes][a]

array([], dtype=int64)

In [27]:
train_entry_query_indexes[:,0,2][train_last_nonstat_indexes][a]

array([], dtype=int64)