## Calculating distance between locations

Using the Haversine formula to calculate distance between coordinates

In [2]:
import pandas as pd
import numpy as np

from math import cos, asin, sqrt, pi
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform

import warnings
warnings.filterwarnings('ignore')

EARTHRADIUS = 6371.0

def get_distance(row1, row2):
    """
    Uses Haversine formula to get 
    distance between coordinates
    in two columns of dataframe
    """
    lat1 = row1[0]
    lon1 = row1[1]
    lat2 = row2[0]
    lon2 = row2[1]
    p = pi/180
    a = 0.5 - cos((lat2-lat1)*p)/2 + cos(lat1*p) * cos(lat2*p) * (1-cos((lon2-lon1)*p))/2
    km = 2 * EARTHRADIUS * asin(sqrt(a))
    return km

### Using GEOID as column header

In [3]:
data = pd.read_csv('data/USDATA.csv')

In [3]:
dist_matrix = pd.DataFrame(squareform(pdist(data[['INTPTLAT10', 'INTPTLON10']].values, lambda u, v: get_distance(u, v))), 
                               index=data['GEOID10'], columns=data['GEOID10'])

In [9]:
dist_matrix = dist_matrix.loc[:, dist_matrix.columns.notna()]
dist_matrix = dist_matrix[dist_matrix.index.notnull()]

In [10]:
dist_matrix.to_csv('data/dist_matrix_geoid.csv')

### Using name as column header

In [5]:
dist_matrix_name = pd.DataFrame(squareform(pdist(data[['INTPTLAT10', 'INTPTLON10']].values, lambda u, v: get_distance(u, v))), 
                               index=data['NAME10'], columns=data['NAME10'])

In [6]:
dist_matrix_name = dist_matrix_name.loc[:, dist_matrix_name.columns.notna()]
dist_matrix_name = dist_matrix_name[dist_matrix_name.index.notnull()]

In [8]:
dist_matrix_name.to_csv('data/dist_matrix_name.csv')