In [5]:
from math import radians, cos, sin, asin, sqrt

import folium 
import pandas as pd
import numpy as np

In [6]:
df = pd.read_csv("../traceset/topology.csv", index_col=False)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13269 entries, 0 to 13268
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   bs      13269 non-null  int64  
 1   lon     13269 non-null  float64
 2   lat     13269 non-null  float64
dtypes: float64(2), int64(1)
memory usage: 311.1 KB


In [8]:
df.sample(5)

Unnamed: 0,bs,lon,lat
944,945,111.136232,13.163152
12489,12490,111.147299,13.109297
3632,3633,111.029284,13.191783
1004,1005,111.180201,13.190725
12785,12786,111.126705,13.15766


In [9]:
# primeiro com um subset
sub_df = df.sample(100)

In [10]:
sub_df

Unnamed: 0,bs,lon,lat
1049,1050,111.083936,13.125975
8077,8078,111.082806,13.056710
11313,11314,111.117735,13.042483
1594,1595,111.226733,13.302926
11301,11302,111.183952,13.071945
...,...,...,...
7180,7181,111.094942,13.170786
2223,2224,111.029322,13.197419
4522,4523,111.085333,13.107244
709,710,111.188202,13.036661


In [11]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [12]:
def create_dist_df(df):
    df.reset_index(drop=True, inplace=True)

    dist_dict = {"from": [], "to": [], "distance": []}
    dist_matrix = np.zeros([len(df), len(df)])

    for i in df.itertuples():
        idx_i, bs_from, lat_i, lon_i = i[0], i[1], i[2], i[3]
    
        for j in df.itertuples():
            idx_j, bs_to, lat_j, lon_j = j[0], j[1], j[2], j[3]
            dist_dict["from"].append(bs_from)
            dist_dict["to"].append(bs_to)
            dist_dict["distance"].append(haversine(lon_i, lat_i, lon_j, lat_j))

    
    return pd.DataFrame.from_dict(dist_dict)

In [14]:
dist_matrix = create_dist_df(sub_df) 

In [20]:
dist_matrix

Unnamed: 0,from,to,distance
0,1050,1050,0.000000
1,1050,8078,2.773423
2,1050,11314,5.029469
3,1050,1595,17.393810
4,1050,11302,11.330261
...,...,...,...
9995,1947,7181,1.300569
9996,1947,2224,6.211422
9997,1947,4523,2.190852
9998,1947,710,12.657544


In [22]:
dist_matrix.to_csv('../sensor_distance.csv', index=False)

In [19]:
sub_df['bs'].values

array([ 1050,  8078, 11314,  1595, 11302,  1295,   344, 12339, 11952,
       11403,  4579,  4721,  9956,  5292,  4304,  5537,  3196, 11975,
       10297,  7721,  2677,  9401, 11959,  8576,  5804,  4671,  8904,
        7738,  1956, 11482,  2576,   780, 10698,  7918,  5112, 11864,
        5660,  3697,  3232,  6361,  1582,  2239,  9963,  9256,  4988,
       11300, 12500,  6541,  2213,  4281,  9227,  5723, 11334,  6121,
         314,  8421,   588,  6366, 12919,  3471, 10781,  9355,  4161,
        6197,    47,  6953, 10772, 11286, 12895,  3842,  9668, 10104,
        5522, 10741,  3719,  9278,  9128,  6759,  2429,   409, 11156,
        1745, 11894,  2685, 12910, 10738,  5654, 11990,  2038,  4587,
       13009,  8181,  4180,  2392, 10111,  7181,  2224,  4523,   710,
        1947])