In [4]:
import scipy.io as sio
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
import pickle
import os
import time
from sklearn.manifold import SpectralEmbedding
from sklearn.gaussian_process import GaussianProcessRegressor

In [5]:
datapath_base = "/data/yutaro/IROS/"
datapath_big = "/data/yutaro/IROS/sim_data_full_v13_d4_m1.mat"
datapath_small = "/data/yutaro/IROS/sim_data_partial_v13_d4_m1.mat"

In [6]:
#datapath_big = "/data/yutaro/IROS/sim_data_full_v11_d4_m1.mat"
#datapath_small = "/data/yutaro/IROS/sim_data_partial_v111_d4_m1.mat"

big = sio.loadmat(datapath_big)
print(big['D'].shape)
print(pd.DataFrame(big['D']).head())

small = sio.loadmat(datapath_small)
print(small['D'].shape)
print(pd.DataFrame(small['D']).head())

(411151, 10)
          0           1          2          3    4    5         6           7  \
0 -0.220820  117.741577  15.969999  15.969999 -1.0 -1.0 -0.200762  117.745787   
1 -0.200762  117.745787  15.939999  15.939999 -1.0 -1.0 -0.184196  117.750466   
2 -0.184196  117.750466  15.909998  15.909998 -1.0 -1.0 -0.169507  117.755398   
3 -0.169507  117.755398  15.879997  15.879997 -1.0 -1.0 -0.156517  117.760427   
4 -0.156517  117.760427  15.849998  15.849998 -1.0 -1.0 -0.144677  117.765442   

           8          9  
0  15.939999  15.939999  
1  15.909998  15.909998  
2  15.879997  15.879997  
3  15.849998  15.849998  
4  15.819997  15.819997  
(6544, 10)
          0           1          2          3    4    5         6           7  \
0 -0.059364  114.428863  15.969999  15.969999 -1.0 -1.0 -0.083693  114.398494   
1 -0.083693  114.398494  15.939999  15.939999 -1.0 -1.0 -0.121254  114.377551   
2 -0.121254  114.377551  15.909998  15.909998 -1.0 -1.0 -0.137358  114.368394   
3 -0.1373

In [9]:
% time
nearest_neighbor(small['D'], small['D'])

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.91 µs


(array([0., 0., 0., ..., 0., 0., 0.]),
 array([    0,     1,     2, ..., 83885, 83886, 83887]))

In [8]:
small['D'].shape

(83888, 10)

In [27]:
np.mean(small['D'], axis=0).shape

(10,)

In [118]:
big['D'][0,6:10]

array([ -0.2007618 , 117.74578691,  15.93999863,  15.93999863])

### First, divide the dataset into 4 (each set corresponds to each action)

In [119]:
def condition_data_on_action(data):
    dict = {'left': [], 'left_down': [], 'down': [], 'right_down': [], 'right': [], 'right_up': [], 'up': [], 'left_up': []}

    for i in range(data['D'].shape[0]):
        if data['D'][i, 4] == -1 and data['D'][i, 5] == 0:
            dict['left'].append(np.concatenate([data['D'][i,:4],data['D'][i,6:10]], axis=0)) 
        elif data['D'][i, 4] == -1 and data['D'][i, 5] == -1:
            dict['left_down'].append(np.concatenate([data['D'][i,:4], data['D'][i,6:10]], axis=0))
        elif data['D'][i, 4] == 0 and data['D'][i, 5] == -1:
            dict['down'].append(np.concatenate([data['D'][i,:4], data['D'][i,6:10]], axis=0))
        elif data['D'][i, 4] == 1 and data['D'][i, 5] == -1:
            dict['right_down'].append(np.concatenate([data['D'][i,:4], data['D'][i,6:10]], axis=0))
        elif data['D'][i, 4] == 1 and data['D'][i, 5] == 0:
            dict['right'].append(np.concatenate([data['D'][i,:4], data['D'][i,6:10]], axis=0))
        elif data['D'][i, 4] == 1 and data['D'][i, 5] == 1:
            dict['right_up'].append(np.concatenate([data['D'][i,:4], data['D'][i,6:10]], axis=0))
        elif data['D'][i, 4] == 0 and data['D'][i, 5] == 1:
            dict['up'].append(np.concatenate([data['D'][i,:4], data['D'][i,6:10]], axis=0))
        elif data['D'][i, 4] == -1 and data['D'][i, 5] == 1:
            dict['left_up'].append(np.concatenate([data['D'][i,:4], data['D'][i,6:10]], axis=0))
    return dict

In [120]:
big_dict = condition_data_on_action(big)
small_dict = condition_data_on_action(small)

In [121]:
# sanity check
valid_big_dim = 0
for key, val in big_dict.items():
    print(key)
    print(np.array(val).shape)
    valid_big_dim += np.array(val).shape[0]
print(valid_big_dim)
print(big['D'].shape)

left_down
(74990, 8)
left_up
(41628, 8)
right_down
(45653, 8)
down
(41646, 8)
left
(41681, 8)
right
(43339, 8)
up
(41254, 8)
right_up
(80960, 8)
411151
(411151, 10)


In [122]:
# sanity check
valid_small_dim = 0
for key, val in small_dict.items():
    print(key)
    print(np.array(val).shape)
    valid_small_dim += np.array(val).shape[0]
print(valid_small_dim)
print(small['D'].shape)

left_down
(1026, 8)
left_up
(285, 8)
right_down
(829, 8)
down
(589, 8)
left
(245, 8)
right
(579, 8)
up
(1835, 8)
right_up
(1156, 8)
6544
(6544, 10)


In [9]:
np.histogram(big['D'][:, 4]) 

(array([158299,      0,      0,      0,      0,  82900,      0,      0,
             0, 169952]),
 array([-1. , -0.8, -0.6, -0.4, -0.2,  0. ,  0.2,  0.4,  0.6,  0.8,  1. ]))

In [6]:
pd.DataFrame(small['D']).head() 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.059364,114.428863,15.969999,15.969999,-1.0,-1.0,-0.083693,114.398494,15.939999,15.939999
1,-0.083693,114.398494,15.939999,15.939999,-1.0,-1.0,-0.121254,114.377551,15.909998,15.909998
2,-0.121254,114.377551,15.909998,15.909998,-1.0,-1.0,-0.137358,114.368394,15.879997,15.879997
3,-0.137358,114.368394,15.879997,15.879997,-1.0,-1.0,-0.153154,114.366211,15.849998,15.849998
4,-0.153154,114.366211,15.849998,15.849998,-1.0,-1.0,-0.167177,114.388905,15.819997,15.819997


In [181]:
def find_linear_transformation(A, B, tolerance=1e-6):
    # A = n by m
    # B = n by m
    # Find T \in R^{m by m} s.t. T a_i = b_i for d(a_i, b_i) is small for i=1...n
    src, dst = A, B
    n = A.shape[0]
    max_iter = 10
    prev_error = 0
    for i in range(max_iter):
        # find the nearest neighbors between the current source and destination points
        #print(src.shape, dst.shape)
        distances, indices = nearest_neighbor(src, dst)
        # compute the transformation between the current source and nearest destination points
        T = best_fit_linear_transform(src, dst[indices, :])
        # update the current source 
        src = np.dot(src, T)  
        # check error
        mean_error = np.mean(distances)
        if np.abs(prev_error - mean_error) < tolerance:
            break
        prev_error = mean_error
    # calculate final transformation
    T = best_fit_linear_transform(A, src)
    return T, distances 

In [81]:
def best_fit_linear_transform(A, B):
    T_est = np.linalg.pinv(A).dot(B)
    return T_est

In [125]:
def test_fitting_linear_transform():
    m = 4
    N = 100  # num samples

    rng = np.random.RandomState(42)

    W = rng.randn(m, 8)
    X = rng.randn(N, m)
    Z_clean = X.dot(W)

    Z = Z_clean + rng.randn(*Z_clean.shape) * .001

    W_est = np.linalg.pinv(X).dot(Z)

    print("W: {}".format(W))
    print("W_est: {}".format(W_est))

In [126]:
test_fitting_linear_transform()

W: [[ 0.49671415 -0.1382643   0.64768854  1.52302986 -0.23415337 -0.23413696
   1.57921282  0.76743473]
 [-0.46947439  0.54256004 -0.46341769 -0.46572975  0.24196227 -1.91328024
  -1.72491783 -0.56228753]
 [-1.01283112  0.31424733 -0.90802408 -1.4123037   1.46564877 -0.2257763
   0.0675282  -1.42474819]
 [-0.54438272  0.11092259 -1.15099358  0.37569802 -0.60063869 -0.29169375
  -0.60170661  1.85227818]]
W_est: [[ 0.49684783 -0.13834211  0.64769879  1.52306484 -0.23416831 -0.23406561
   1.57917356  0.76728175]
 [-0.4695567   0.54279454 -0.46335647 -0.46562165  0.24189757 -1.91325259
  -1.72474676 -0.56208957]
 [-1.01282548  0.31422305 -0.90797028 -1.41228736  1.46571876 -0.22583531
   0.06749571 -1.424704  ]
 [-0.54443889  0.11062472 -1.15109093  0.37560542 -0.60059882 -0.29153448
  -0.60179033  1.85226932]]


In [112]:
# Step 1: Normalize the training data
def compute_normalization_parameters(data):
    """
    Compute normalization parameters (min, max per feature)
    :param data: matrix with data organized by rows [num_samples x num_features]
    :return: min and max per feautre as row matrices of dimension [1 x num_variables]
    """
    min_param = np.min(data, axis=0)
    max_param = np.max(data, axis=0)
    return np.expand_dims(min_param, 0), np.expand_dims(max_param, 0)

def normalize_data_per_row(data, min_param, max_param):
    """
    Normalize a given matrix of data (samples must be organized per row)
    :param data: input data
    :param min_param: min (for each feature) for normalization
    :param max_param: max (for each feature) for normalization
    :return: normalized data, (data - min_param) / max_param - min_param
    """
    # sanity checks!
    assert len(data.shape) == 2, "Expected the input data to be a 2D matrix"
    assert data.shape[1] == min_param.shape[1], "Data - min_param size mismatch ({} vs {})".format(data.shape[1], min_param.shape[1])
    assert data.shape[1] == max_param.shape[1], "Data - max_param size mismatch ({} vs {})".format(data.shape[1], max_param.shape[1])

    # TODO. Complete. Replace the line below with code to whitten the data.
    normalized_data = np.divide(data - min_param, max_param - min_param)
    mean = np.mean(normalized_data, axis=0)
    mean_centered_data = normalized_data - np.expand_dims(mean, 0)
    return mean_centered_data

In [127]:
def prepare_data(data):
    min_param, max_param = compute_normalization_parameters(data)
    mean_centered_normalized_data = normalize_data_per_row(data[:,:], min_param, max_param)
    return mean_centered_normalized_data

In [194]:
def main():
    # for each dataset (conditioned on action), find the best linear transformation
    # Note that each dataset is mean-centered so I don't need to take care of the translation
    np.random.seed(123)
    T_dict = {}
    for g, gg in zip(big_dict.items(), small_dict.items()):
        key_big, val_big = g
        key_small, val_small = gg
        data_big = np.array(big_dict[key_big])
        print(key_big, data_big.shape)
        data_small = np.array(small_dict[key_small])
        print(key_small, data_small.shape)
        data_big = prepare_data(data_big)
        data_small = prepare_data(data_small)
        print("should be zero: {}".format(np.mean(data_big, axis=0)))
        # First partition the big data randomly so that the number of points matches with the small data
        # Then feed the two into find_linear_transformation()
        # Make sure that I normalized the data first.
        random_indices = np.random.permutation(data_big.shape[0])
        start = 0
        n_count = 0
        batch_size = data_small.shape[0]
        T_list, dist_list = [], []
        while n_count < int(data_big.shape[0] / batch_size)-1:
            T, distances = find_linear_transformation(data_big[start:(start+batch_size), :], data_small)
            start = start+batch_size+1
            T_list.append(T)
            dist_list.append(distances)
            n_count += 1 
        T_list = reject_outliers(T_list, m=2)
        T_mean = np.zeros(T_list[0].shape)
        for i in range(len(T_list)):
            T_mean += T_list[i]
        T_mean /= len(T_list)
            
        T_dict[key_big] = T_mean #, dist_list
    return T_dict

2.0

In [173]:
T_list_new = reject_outliers(T_list, m=2)

In [174]:
print(len(T_list_new))

62


In [171]:
def reject_outliers(T_list, m = 2.):
    norm_arr = np.zeros((len(T_list),))
    for i in range(len(T_list)):
        norm_arr[i] = np.linalg.norm(T_list[i])
    d = np.abs(norm_arr - np.median(norm_arr))
    mdev = np.median(d)
    s = d/mdev if mdev else 0.
    selected_indics = np.where(norm_arr[s<m])[0]
    T_list_new = []
    for idx in selected_indics:
        T_list_new.append(T_list[idx])
    return T_list_new

In [None]:
def reject_outliers(data, m=2):
    return data[abs(data - np.mean(data)) < m * np.std(data)]

In [195]:
T_dict = main() 

left_down (74990, 8)
left_down (1026, 8)
should be zero: [-1.33895799e-15  7.63428822e-15 -6.19426870e-15 -6.11721635e-15
 -1.97380450e-15 -2.49142693e-15  9.99695355e-15  1.20831383e-14]
left_up (41628, 8)
left_up (285, 8)
should be zero: [ 9.46471263e-16  1.79726620e-15  1.28308796e-14  4.00287807e-14
 -5.42328532e-16 -1.53501108e-15  7.19520292e-15 -4.66411152e-15]
right_down (45653, 8)
right_down (829, 8)
should be zero: [-6.03369602e-16 -1.71890627e-15 -3.92717107e-15 -6.06096401e-15
  7.79873492e-15 -1.45991665e-15  1.25341779e-15  2.61505515e-15]
down (41646, 8)
down (589, 8)
should be zero: [ 3.42759432e-15 -1.99726312e-15  1.05184941e-14 -3.82475578e-15
  3.68881370e-15  6.27308266e-16 -2.88399132e-16  1.63686385e-15]
left (41681, 8)
left (245, 8)
should be zero: [-1.55843818e-15 -2.01253727e-15 -3.96116100e-15  3.56947237e-14
 -1.87085005e-15 -4.18869799e-15 -9.51937467e-16  3.60133605e-14]
right (43339, 8)
right (579, 8)
should be zero: [ 1.75858364e-15  3.98331032e-15 -4.63

In [211]:
transformed_data_dic = {}
for g, gg in zip(T_dict.items(), big_dict.items()):
    key_T, val_T = g
    key_big, val_big = gg
    print(np.array(val_big).shape)
    print(val_T.shape)
    val_big = prepare_data(np.array(val_big))
    transformed_data_dic[key_big] = np.dot(val_big, val_T)

(74990, 8)
(8, 8)
(41628, 8)
(8, 8)
(45653, 8)
(8, 8)
(41646, 8)
(8, 8)
(41681, 8)
(8, 8)
(43339, 8)
(8, 8)
(41254, 8)
(8, 8)
(80960, 8)
(8, 8)


In [216]:
import pickle
with open('transformed_big_data.pkl', 'wb') as f:
    pickle.dump(transformed_data_dic, f)

In [None]:
import pickle
with open('big_data.pkl', 'wb') as f:
    pickle.dump(transformed_data_dic, f)

In [208]:
for key, val in transformed_data_dic.items(): 
    print(key)

left_down
left_up
down
right_down
left
right
up
right_up


In [212]:
pd.DataFrame(transformed_data_dic['left_down']).head()   

Unnamed: 0,0,1,2,3,4,5,6,7
0,-0.0129,0.247513,-0.053998,-0.04877,-0.012709,0.248201,-0.053916,-0.048653
1,-0.013087,0.24772,-0.054207,-0.04885,-0.012895,0.248409,-0.054125,-0.048732
2,-0.013315,0.248069,-0.054489,-0.04897,-0.013122,0.248759,-0.054407,-0.048851
3,-0.013437,0.248286,-0.054551,-0.048964,-0.013243,0.248977,-0.054469,-0.048845
4,-0.013816,0.248783,-0.055124,-0.049253,-0.013622,0.249475,-0.055042,-0.049134


In [215]:
pd.DataFrame(prepare_data(np.array(small_dict['left_down']))).head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,-0.038787,0.402154,-0.085201,-0.058061,-0.03892,0.402362,-0.085322,-0.058078
1,-0.038985,0.401507,-0.086395,-0.059353,-0.039226,0.401914,-0.086517,-0.05937
2,-0.03929,0.40106,-0.087588,-0.060645,-0.039357,0.401719,-0.087711,-0.060662
3,-0.039421,0.400865,-0.088781,-0.061937,-0.039486,0.401672,-0.088906,-0.061954
4,-0.03955,0.400818,-0.089975,-0.063229,-0.0396,0.402157,-0.090101,-0.063246


In [157]:
print(len(T_list))

73


In [156]:
print(np.mean(dist_list[1]))

0.11564688245775918


In [158]:
T_list[0].astype(int)

array([[ 56,   8,  45,  -3,  56,   8,  45,  -3],
       [ 27,  61,  -6, -35,  27,  62,  -6, -35],
       [-86, -39, -64,   7, -85, -39, -64,   7],
       [ 83,  53,  39, -34,  83,  53,  39, -34],
       [-53,  -3, -44,   2, -54,  -3, -44,   2],
       [-27, -60,   6,  35, -27, -60,   6,  35],
       [ 84,  30,  64,  -6,  84,  30,  64,  -6],
       [-82, -45, -38,  34, -82, -45, -38,  34]])

In [159]:
T_list[1].astype(int)

array([[  22,  -74,   -9,  -39,   22,  -74,   -9,  -39],
       [  22,  116,  -26,  -49,   22,  116,  -26,  -49],
       [ -98, -144,   -4,   88,  -98, -145,   -4,   88],
       [ 100,  135,    3,  -92,  100,  135,    4,  -92],
       [ -20,   81,   10,   39,  -21,   81,   10,   40],
       [ -22, -113,   26,   49,  -22, -113,   26,   49],
       [  98,  133,    4,  -89,   98,  133,    4,  -89],
       [-100, -125,   -3,   93, -100, -125,   -3,   93]])

In [149]:
dist_list[0].shape

(1026,)

In [160]:
relative_err_test = [] 
for i in range(len(T_list)-1):
    relative_err_test.append(np.linalg.norm(T_list[i] - T_list[i+1]) / np.linalg.norm(T_list[i]))
    print(np.linalg.norm(T_list[i]))
print(np.array(relative_err_test).mean())

378.1931711608596
626.2498990633692
409.05100050654016
504.85272817436305
647.4885426198456
702.0825044196896
389.94839021392835
104.21903990550652
300.36183576076826
1121.5953784968403
636.0193897689768
537.0126916049593
1125.3410021794934
723.4008308048186
693.1159767545138
172.10715286407472
262.6677724088486
462.2684985168057
193.0577610631587
681.5549015756867
551.5672459796511
552.7202267146204
261.82220988608975
718.7443329619549
672.8507367468026
294.09966120492777
555.9962214205436
543.8115273125966
298.07946861729414
1648.5617445432495
1135.1266640717317
612.545256723554
90513.41104462631
518.9153820286263
793.6929647990164
514.7461743137429
343.8896837926791
454.29694379303504
1139.2625811004561
519.0096682486383
2759005.266374381
336.9980730861339
311.22821158394896
1349.6391530064207
2240.632647666877
419.75648850822273
380.32908735317267
377.41524823055863
407.45759970360285
344.792652284508
352.3908590109823
186.15547936093094
286.8633732260509
190.74846255996596
317.094

In [135]:
relative_err_test

[1.166134188408563,
 1.3150483639629322,
 1.7198805816856744,
 1.6794788721971006,
 1.2402865925053344,
 1.2330335143056672,
 0.9401510446575505,
 3.10896546114857,
 4.1149543951468255,
 1.2266407427124184,
 1.0113425865480716,
 2.016089427561995,
 1.104972461779321,
 1.4267630764899173,
 0.9348778239472214,
 1.4535353758360978,
 2.409782930948377,
 0.7911820075682531,
 2.3076157288742487,
 1.0422701929169131,
 1.0215675527237653,
 1.1632697452844405,
 2.6926145173754445,
 1.5993589416969367,
 0.8720846476667173,
 1.7106055958855346,
 1.3997850874621702,
 1.3917438214742266,
 5.406318081782878,
 0.8438614659816267,
 0.9953719098514509,
 153.68097924797118,
 0.9986175403190596,
 1.459819083595498,
 1.0775264603921813,
 1.0038961956051642,
 2.0731277452648134,
 2.3969707421796382,
 1.2155703691787105,
 4667.141289698619,
 1.0000329400124328,
 1.1437400447616766,
 4.008155413040423,
 1.9313551396256758,
 1.0096310183803896,
 0.8806437480723049,
 1.2669532137936166,
 1.4259663753034224,
 1

In [133]:
np.linalg.norm(T_list[24])

710.2483991819145

In [99]:
print(relative_err_test[24])

24.34825539363365


In [134]:
print(T_list[0] - T_list[1])

[[  28.98295454   77.27249858   59.9571126    44.33328055   29.23664056
    77.46964841   60.48553454   44.60658338]
 [   9.30080477  -50.68580965   24.92934778   17.26360729    9.63789868
   -50.44686135   24.96265005   17.31311699]
 [  15.21337667  106.19203982  -54.79303955  -77.52089264   14.95889063
   106.39974797  -55.06395572  -77.71846148]
 [ -18.42649537  -84.52687772   32.4454192    54.42465661  -18.09501957
   -84.5042169    32.38477732   54.42205497]
 [ -28.42355841  -78.40117528  -60.29656315  -45.47899289  -28.67441463
   -78.59421963  -60.8332148   -45.75356096]
 [  -9.15087832   50.41428142  -24.63385298  -17.0596753    -9.48759133
    50.17573161  -24.66730104  -17.10952413]
 [ -16.22847466 -104.11833363   55.32169766   79.49455416  -15.97988427
  -104.33379844   55.60774912   79.69538041]
 [  19.55480435   82.82168157  -32.81833066  -56.25797811   19.22689848
    82.80564657  -32.7696322   -56.25727578]]


In [12]:
def best_fit_transform(A, B):
    '''
    Calculates the least-squares best-fit transform that maps corresponding points A to B in m spatial dimensions
    Input:
      A: Nxm numpy array of corresponding points
      B: Nxm numpy array of corresponding points
    Returns:
      T: (m+1)x(m+1) homogeneous transformation matrix that maps A on to B
      R: mxm rotation matrix
      t: mx1 translation vector
    '''
    assert A.shape == B.shape

    # get number of dimensions
    m = A.shape[1]

    # translate points to their centroids = mean centered
    centroid_A = np.mean(A, axis=0) # centroid_A.shape = (m,)
    centroid_B = np.mean(B, axis=0)
    AA = A - centroid_A
    BB = B - centroid_B

    # rotation matrix
    H = np.dot(AA.T, BB)
    U, S, Vt = np.linalg.svd(H) 
    R = np.dot(Vt.T, U.T)

    # special reflection case
    if np.linalg.det(R) < 0:
       Vt[m-1,:] *= -1
       R = np.dot(Vt.T, U.T)

    # translation
    t = centroid_B.T - np.dot(R,centroid_A.T)

    # homogeneous transformation
    T = np.identity(m+1)
    T[:m, :m] = R
    T[:m, m] = t

    return T, R, t

In [55]:
def nearest_neighbor(src, dst):
    '''
    Find the nearest (Euclidean) neighbor in dst for each point in src
    Input:
        src: Nxm array of points
        dst: Nxm array of points
    Output:
        distances: Euclidean distances of the nearest neighbor
        indices: dst indices of the nearest neighbor
    '''

    assert src.shape == dst.shape

    neigh = NearestNeighbors(n_neighbors=1)
    neigh.fit(dst)
    distances, indices = neigh.kneighbors(src, return_distance=True)
    return distances.ravel(), indices.ravel()

In [10]:
def icp(A, B, init_pose=None, max_iterations=20, tolerance=0.001):
    '''
    The Iterative Closest Point method: finds best-fit transform that maps points A on to points B
    Input:
        A: Nxm numpy array of source mD points
        B: Nxm numpy array of destination mD point
        init_pose: (m+1)x(m+1) homogeneous transformation
        max_iterations: exit algorithm after max_iterations
        tolerance: convergence criteria
    Output:
        T: final homogeneous transformation that maps A on to B
        distances: Euclidean distances (errors) of the nearest neighbor
        i: number of iterations to converge
    '''

    assert A.shape == B.shape

    # get number of dimensions
    m = A.shape[1]

    # make points homogeneous, copy them to maintain the originals
    src = np.ones((m+1,A.shape[0]))
    dst = np.ones((m+1,B.shape[0]))
    src[:m,:] = np.copy(A.T)
    dst[:m,:] = np.copy(B.T)

    # apply the initial pose estimation
    if init_pose is not None:
        src = np.dot(init_pose, src)

    prev_error = 0

    for i in range(max_iterations):
        # find the nearest neighbors between the current source and destination points
        distances, indices = nearest_neighbor(src[:m,:].T, dst[:m,:].T)

        # compute the transformation between the current source and nearest destination points
        T,_,_ = best_fit_transform(src[:m,:].T, dst[:m,indices].T)

        # update the current source 
        src = np.dot(T, src)

        # check error
        mean_error = np.mean(distances)
        if np.abs(prev_error - mean_error) < tolerance:
            break
        prev_error = mean_error

    # calculate final transformation
    T,_,_ = best_fit_transform(A, src[:m,:].T)

    return T, distances, i

In [19]:
reduced_n = small['D'].shape[0]

In [23]:
print(big['D'][:reduced_n, :].shape, small['D'].shape)

(83888, 10) (83888, 10)


In [24]:
# Run ICP
start = time.time()
T, distances, iterations = icp(big['D'][:reduced_n, :], small['D'], tolerance=0.000001)
print(time.time() - start)

107.83271026611328


In [18]:
iterations

0

In [26]:
T.shape

(11, 11)

In [29]:
big['D'][:reduced_n, :].shape[0]

83888

In [30]:
m = big['D'][:reduced_n, :].shape[1]
C = np.ones([big['D'][:reduced_n, :].shape[0],m+1])
C[:,0:m] = np.copy(big['D'][:reduced_n, :])
C = np.dot(T, C.T).T


# big['D'][reduced_n:(reduced_n+1), :]

In [34]:
pd.DataFrame(big['D'][:reduced_n, :]).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.426909,118.443377,16.0,16.0,0.0,0.0,0.418931,118.442513,16.030001,16.030001
1,0.418931,118.442513,16.030001,16.030001,1.0,1.0,0.404663,118.443087,16.030001,16.030001
2,0.404663,118.443087,16.030001,16.030001,1.0,1.0,0.396586,118.443348,16.060001,16.060001
3,0.396586,118.443348,16.060001,16.060001,1.0,1.0,0.391399,118.442141,16.060001,16.060001
4,0.391399,118.442141,16.060001,16.060001,1.0,1.0,0.384889,118.43922,16.090002,16.090002


In [33]:
pd.DataFrame(C).head() 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.76011,0.260586,0.300565,54.74464,-14.518222,6.235257,0.717201,0.24892,0.332193,55.7244,1.0
1,-0.255656,0.135269,0.271478,54.644751,-14.559483,6.299387,-0.237627,0.143635,0.310243,55.845564,1.0
2,-0.236395,0.144111,0.272911,54.654161,-14.534284,6.316442,-0.258423,0.139153,0.306494,55.857271,1.0
3,-0.256323,0.138483,0.267491,54.659783,-14.508554,6.332363,-0.239094,0.14689,0.310122,55.863741,1.0
4,-0.237107,0.14421,0.271107,54.665055,-14.48166,6.346394,-0.25976,0.139293,0.308153,55.868992,1.0
