<a href="https://colab.research.google.com/github/nepomucenoc/modelagem/blob/main/distance_matrix_lat_lng.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Distance matrix using latitude and longitude

**You can use this possibilities to calculate the distance of the coordinates points geographics.**

Where the result is in km since the earth's radius was expressed in km (R = 6373.0). If you prefer the result in miles, for example, you can express R in miles ('R = 3958.8').

In [1]:
import pandas as pd

import numpy as np
from math import sin, cos, sqrt, atan2, radians

from scipy.spatial.distance import pdist, squareform

In [7]:
lat_long = pd.DataFrame({'LATITUDE': [-22.98, -22.97, -22.92, -22.87, -22.89], 
                         'LONGITUDE': [-43.19, -43.39, -43.24, -43.28, -43.67]})


In [8]:
def dist(x, y):
    """Function to compute the distance between two points x, y"""

    lat1 = radians(x[0])
    lon1 = radians(x[1])
    lat2 = radians(y[0])
    lon2 = radians(y[1])

    R = 6373.0

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c

    return round(distance, 4)

In [9]:
distances = pdist(lat_long.values, metric=dist)
distances

array([20.5115,  8.4123, 15.3203, 50.1784, 16.34  , 15.8341, 30.0319,
        6.9086, 44.1838, 40.0284])

In [10]:
points = [f'point_{i}' for i in range(1, len(lat_long) + 1)]
points

['point_1', 'point_2', 'point_3', 'point_4', 'point_5']

In [11]:
result = pd.DataFrame(squareform(distances), columns=points, index=points)
result

Unnamed: 0,point_1,point_2,point_3,point_4,point_5
point_1,0.0,20.5115,8.4123,15.3203,50.1784
point_2,20.5115,0.0,16.34,15.8341,30.0319
point_3,8.4123,16.34,0.0,6.9086,44.1838
point_4,15.3203,15.8341,6.9086,0.0,40.0284
point_5,50.1784,30.0319,44.1838,40.0284,0.0


## Other possible solution is:

The lower triangle of the distance matrix is empty since that the matrix is symmetric (dist[i1,i2]==dist[i2,i1])

In [2]:
lat_long = pd.DataFrame({'LATITUDE':[-22.98, -22.97, -22.92, -22.87, -22.89], 'LONGITUDE': [-43.19, -43.39, -43.24, -43.28, -43.67]})
lat_long

Unnamed: 0,LATITUDE,LONGITUDE
0,-22.98,-43.19
1,-22.97,-43.39
2,-22.92,-43.24
3,-22.87,-43.28
4,-22.89,-43.67


In [3]:
test = lat_long.iloc[2:,:]
test

Unnamed: 0,LATITUDE,LONGITUDE
2,-22.92,-43.24
3,-22.87,-43.28
4,-22.89,-43.67


In [4]:
def distance(city1, city2):
    lat1 = radians(city1['LATITUDE'])
    lon1 = radians(city1['LONGITUDE'])
    lat2 = radians(city2['LATITUDE'])
    lon2 = radians(city2['LONGITUDE'])

    R = 6373.0

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c

    return distance

In [12]:
dist = np.zeros([lat_long.shape[0],lat_long.shape[0]])
for i1, city1 in lat_long.iterrows():
    for i2, city2 in lat_long.iloc[i1+1:,:].iterrows():
        dist[i1,i2] = distance(city1, city2)

dist

array([[ 0.        , 20.51149047,  8.41230771, 15.32026132, 50.17836849],
       [ 0.        ,  0.        , 16.33997119, 15.83407186, 30.03192954],
       [ 0.        ,  0.        ,  0.        ,  6.90864606, 44.18376436],
       [ 0.        ,  0.        ,  0.        ,  0.        , 40.02842872],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]])