In [1]:
import pandas as pd
import numpy as np
import pickle

columns_probes = ['sampleID', 'dateTime', 'sourceCode', 'latitude', 'longitude', 'altitude', 'speed', 'heading']
columns_links = ['linkPVID', 'refNodeID', 'nrefNodeID', 'length', 'functionalClass', 'directionOfTravel', 'speedCategory',
                 'fromRefSpeedLimit', 'toRefSpeedLimit', 'fromRefNumLanes', 'toRefNumLanes', 'multiDigitized', 'urban',
                 'timeZone', 'shapeInfo', 'curvatureInfo', 'slopeInfo']

df_probes = pd.read_pickle('probes_pickle.pkl')
df_links = pd.read_pickle('links_pickle.pkl')


In [2]:
# probe_coord = [[df_probes['latitude'][i], df_probes['longitude'][i]] for i in range(len(df_probes))]
# with open('probe_coord.pkl', 'wb') as f:
#     pickle.dump(probe_coord, f)
with open('probe_coord.pkl', 'rb') as f:
    probe_coord = pickle.load(f)
min_probe = np.min(probe_coord, axis=0)
max_probe = np.max(probe_coord, axis=0)
norm_probe_coord = (probe_coord - min_probe) / (max_probe - min_probe)

In [3]:
link_shape = df_links['shapeInfo'].str.split('|')
ref_coord = [link_shape[i][0].split('/') for i in range(len(link_shape))]
nonref_coord = [link_shape[i][-1].split('/') for i in range(len(link_shape))]

In [4]:
for i in range(len(ref_coord)):
    for j in range(3):
        if ref_coord[i][j] == '':
            ref_coord[i][j] = '0'
ref_coord = list(map(lambda sl: list(map(float, sl)), ref_coord))
min_ref = np.min(ref_coord, axis=0)
max_ref = np.max(ref_coord, axis=0)
norm_ref_coord = (ref_coord - min_ref) / (max_ref - min_ref)

for i in range(len(nonref_coord)):
    for j in range(3):
        if nonref_coord[i][j] == '':
            nonref_coord[i][j] = '0'
nonref_coord = list(map(lambda sl: list(map(float, sl)), nonref_coord))
min_nonref = np.min(nonref_coord, axis=0)
max_nonref = np.max(nonref_coord, axis=0)
norm_nonref_coord = (nonref_coord - min_nonref) / (max_nonref - min_nonref)


In [5]:
def dist(point_1, point_2):
    # Using Euclidean distance since the points to be calculated
    # are close enough to consider them existing on a 2-D plane
    return np.srqt((point_1[0] - point_2[0]) ** 2 + (point_1[1] - point_2[1]) ** 2)

def get_perp_dist(point, ref, non_ref):

    if (non_ref[0] - ref[0]) == 0:
        d = abs(point[0] - ref[0])
    else:
        m = (non_ref[1] - ref[1]) / (non_ref[0] - ref[0])
        A = -m
        C = m * ref[0] - ref[1]
        d = abs(A * point[0] + point[1] + C)
        d /= np.sqrt(m ** 2 + 1)
    return d

def get_perp_dist_list(point, refs, non_refs):
    dist_list = []
    for i in range(len(refs)):
        dist_list.append(get_perp_dist(point, refs[i], non_refs[i]))
    return np.array(dist_list)

In [6]:
# # Combining everything for probe matching for all probe points
# N = 10

# matched_links_indices = []

# # Getting perpendicular distance from all links for 1 probe point
# orig_dist_list = get_perp_dist_list(norm_probe_coord[0], norm_ref_coord, norm_nonref_coord)
# # Getting the index of the closest N perpendicular distances for 1 probe point
# fake_dist_list = orig_dist_list.copy()
# closest_n_ind = []
# for i in range(N):
#     closest_n_ind.append(fake_dist_list.argmin())
#     fake_dist_list[closest_n_ind[i]] = np.inf
# print(closest_n_ind)

# for p_index in range(0, 64):  # len(df_probes[:])):

#     # Second step of updating the distances for selection for 1 probe point.
#     # Bearing calcualtion for 1 link.
#     headings = []
#     updated_dist_list = []
#     for close_index in closest_n_ind:
#         ref = ref_coord[close_index]
#         non_ref = nonref_coord[close_index]
        
#         updated_dist_list.append(get_perp_dist(norm_probe_coord[p_index], norm_ref_coord[close_index], norm_nonref_coord[close_index]))

#         # BEARING CALCULATIONS
#         y = np.cos(non_ref[0]) * np.sin(non_ref[1] - ref[1])
#         x = np.cos(ref[0]) * np.sin(non_ref[0]) - np.sin(ref[0]) * np.cos(non_ref[0]) * np.cos(non_ref[1] - ref[1])

#         bearing = np.degrees(np.arctan2(y, x))
#         if df_links['directionOfTravel'][close_index] == 'T':
#             bearing = -bearing
#         if bearing < 0:
#             bearing += 360

#         heading_factor = abs(df_probes['heading'][p_index] - bearing) / 720

#         headings.append(heading_factor)

#     headings = np.array(headings)
    
#     updated_dist_list /= updated_dist_list[-1]

#     # print(updated_dist_list)
#     # Creating the selection list
#     selection_list = (updated_dist_list + headings) / 2
    
#     # print(selection_list)

#     selected_probe_index = closest_n_ind[selection_list.argmin()]
#     print(selected_probe_index)

#     matched_links_indices.append(selected_probe_index)

# # print(matched_links_indices)

In [13]:
# Combining everything for probe matching for all probe points
N = 10

matched_links_indices = []
p_index = 0

for sampleID in df_probes['sampleID'].unique():

    # Getting perpendicular distance from all links for 1 probe point
    orig_dist_list = get_perp_dist_list(norm_probe_coord[p_index], norm_ref_coord, norm_nonref_coord)

    # Getting the index of the closest N perpendicular distances for 1 probe point
    fake_dist_list = orig_dist_list.copy()
    closest_n_ind = []
    for i in range(N):
        closest_n_ind.append(fake_dist_list.argmin())
        fake_dist_list[closest_n_ind[i]] = np.inf
    # print(closest_n_ind)

    print(sampleID, p_index)

    while (df_probes['sampleID'][p_index] == sampleID):

        # Second step of updating the distances for selection for 1 probe point.
        # Bearing calcualtion for 1 link.
        headings = []
        updated_dist_list = []
        for close_index in closest_n_ind:
            ref = ref_coord[close_index]
            non_ref = nonref_coord[close_index]
            
            updated_dist_list.append(get_perp_dist(norm_probe_coord[p_index], norm_ref_coord[close_index], norm_nonref_coord[close_index]))

            # BEARING CALCULATIONS
            y = np.cos(non_ref[0]) * np.sin(non_ref[1] - ref[1])
            x = np.cos(ref[0]) * np.sin(non_ref[0]) - np.sin(ref[0]) * np.cos(non_ref[0]) * np.cos(non_ref[1] - ref[1])

            bearing = np.degrees(np.arctan2(y, x))
            if df_links['directionOfTravel'][close_index] == 'T':
                bearing = -bearing
            if bearing < 0:
                bearing += 360

            heading_factor = abs(df_probes['heading'][p_index] - bearing) / 720

            headings.append(heading_factor)

        headings = np.array(headings)
        
        updated_dist_list /= updated_dist_list[-1]

        # print(updated_dist_list)
        # Creating the selection list
        selection_list = (updated_dist_list + headings) / 2
        
        # print(selection_list)

        selected_probe_index = closest_n_ind[selection_list.argmin()]
        # print(selected_probe_index)

        matched_links_indices.append(selected_probe_index)

        p_index += 1

# print(matched_links_indices)

3496 0
4552 63
4553 125
4554 187
4555 249
4556 311
4557 373
4558 435
4559 497
4560 559
4561 633
4562 695
4563 757
5583 819
5734 881
5735 892
5736 954
6212 965
6213 1027
6214 1089
6215 1151
6216 1213
6217 1275
6218 1337
6219 1399
6220 1461
6221 1523
6222 1585
6223 1647
7353 1709
7354 1771
7355 1832
7356 1894
7357 1956
7358 2018
7359 2080
7360 2142
7361 2204
7362 2266
7363 2328
7364 2390
7365 2452
7366 2526
7369 2588
7885 2650
7886 2717
7887 2779
8366 2841
8367 2903
8368 2965
8369 3026
9380 3088
9381 3103
9382 3159
9383 3221
9384 3283
9385 3345
9386 3407
9387 3469
9388 3531
9389 3593
9390 3655
9391 3715
9392 3772
10537 3814
13431 3876
13432 3938
13433 4000
13434 4062
13435 4124
13436 4186
13437 4248
13438 4310
13439 4372
13440 4434
14025 4496
14026 4558
14815 4620
14816 4682
14817 4744
14818 4806
14819 4873
14820 4935
14821 4997
14822 5059
14823 5121
14824 5183
14825 5245
14826 5307
14827 5369
14828 5431
14829 5493
14830 5555
14831 5617
14832 5679
14833 5741
14834 5803
14835 5865
14836 5

KeyboardInterrupt: 

In [23]:
matched_points_df = df_probes.copy()
matched_points_df['linkPVID'] = df_links['linkPVID'][matched_links_indices].reset_index(drop=True)
mathed_points_df.to_pickle('matched_points.pkl')

NameError: name 'mathed_points_df' is not defined

In [None]:
df_matched_points = pd.read_pickle('matched_points.pkl')
print(df_matched_points)