In [6]:
import pandas as pd
import os

def getTaggedPoints(csv_path):
    df = pd.read_csv(csv_path)
    tagged_points = []
    lat_key, lng_key, id_key, type_key = '', '', '', ''
    for key in list(df):
        key_lower = key.lower()
        if lat_key == '':
            if 'lat' in key_lower or key_lower == 'latitude' or 'features__geometry__coordinates__002' in key_lower or key_lower == 'x':
                lat_key = key
        if lng_key == '':
            if 'lon' in key_lower or key_lower == 'longitude' or 'features__geometry__coordinates__001' in key_lower or key_lower == 'y':
                lng_key = key
        if id_key == '':
            if 'id' in key_lower[-2:] or ' id' in key_lower:
                id_key = key
        if type_key == '':
            if 'type' in key_lower:
                type_key = key
        else:
            pass

    for i in range(len(df)):
        # Appends a tuple of (id, (lat, lng)) to the list of points

        
        if np.isnan(df[lat_key][i]) or np.isnan(df[lng_key][i]):
            id = "Error: Missing coordinate data"
            coors = (0.0, 0.0)
            tagged_points.append( (id, coors, 'Unknown') )
        else:
            if type_key == '':
                id = str(df[id_key][i])
                coors = (df[lat_key][i], df[lng_key][i])
                tagged_points.append( (id, coors) )
            else:
                id = str(df[id_key][i])
                type_of = str(df[type_key][i])
                coors = (df[lat_key][i], df[lng_key][i])
                tagged_points.append( (id, coors, type_of) )
                
    return tagged_points
        

In [7]:
from nn_utils import *

src_sites_filepath = "country_data/jor_schools.csv"
tgt_sites_filepath = "country_data/jor_healthsites.csv"

updated_filename = os.path.splitext(src_sites_filepath)[0].split('/')[1]
try:
    tgt_name = tgt_sites_filepath.split('_')[2].split('.')[0]
except:
    try:
        tgt_name = tgt_sites_filepath.split('-')[2].split('.')[0]
    except:
        tgt_name = "target site"

print("Retrieving latitude longitude vectors from " + src_sites_filepath + " and " + tgt_sites_filepath + " ... ")
src_vectors = [tagged_vector[1] for tagged_vector in getTaggedPoints(src_sites_filepath)] 
tgt_tagged_vectors = getTaggedPoints(tgt_sites_filepath)

print("Determining distances to nearest target sites ... ")
# Uses balltree implementation to return distances to neighbors in meters, calculated with haversine
nn_dists_and_indices = get_nearest_neighbors(src_vectors,
                                         [tagged_vector[1] for tagged_vector in tgt_tagged_vectors])

nearest_sites_data = []
for di in nn_dists_and_indices:
    print(di)
    tgt = tgt_tagged_vectors[di[1]]
    tgt_id = tgt_tagged_vectors[di[1]][0]
    if len(tgt) > 2:
        tgt_type = tgt_tagged_vectors[di[1]][2]
        nearest_sites_data.append( (di[0], tgt_id, tgt_type) )
    else:
        nearest_sites_data.append( (di[0], tgt_id) )

print(nearest_sites_data)
print("Generating updated .csv ... ")
# makeUpdatedCsv(nn_dists, "Straight line distance to nearest " + tgt_name, src_sites_filepath, "output_data/" + updated_filename + "_updated.csv")
# print("Done. Exiting... ")

Retrieving latitude longitude vectors from country_data/jor_schools.csv and country_data/jor_healthsites.csv ... 
Determining distances to nearest target sites ... 
[0.55769787 0.62761822]
(1808.537029071564, 29)
(1929.0138585147197, 29)
(1000.76219804146, 37)
(838.2052953575427, 37)
(392.63321217714804, 90)
(458.91963047417454, 37)
(751.8870127861173, 41)
(996.0333269805635, 3)
(1346.1324063036002, 2)
(797.3708076485616, 37)
(599.0356317626073, 41)
(865.9825234689814, 37)
(495.95192424971174, 8)
(813.3343617736933, 37)
(447.91458787346005, 86)
(513.839996563466, 3)
(1738.5526027901399, 29)
(817.7378588740451, 3)
(545.4131603867806, 90)
(1274.3174687693825, 3)
(1900.2116966438782, 29)
(742.0902016793814, 2)
(815.142233006379, 37)
(524.8308560597892, 37)
(1442.3065576370625, 3)
(1045.5537238834968, 3)
(884.9704655271154, 37)
(1370.9186407072477, 3)
(943.8543414327776, 37)
(546.2184715751052, 37)
(1599.1384268164138, 3)
(959.9547550784296, 37)
(1101.356985443329, 3)
(416.11198580043697, 

(1154.9876639741856, 74)
(923.4443554255024, 67)
(113.3838004003262, 64)
(1387.8356525421568, 67)
(923.4443554255024, 67)
(1685.1631054695913, 85)
(751.8859727189016, 64)
(113.3838004003262, 64)
(3227.944298031448, 64)
(22093.84335098563, 18)
(12683.88106603098, 0)
(19360.323096428958, 64)
(19148.195839376996, 64)
(5708.307207717874, 64)
(8191.061355328849, 44)
(16588.813157060413, 74)
(17482.889145597957, 46)
(18519.488851079663, 46)
(6827.563289544802, 74)
(17790.34326840527, 77)
(9300.633646524664, 44)
(19894.42380768903, 22)
(18335.68755107197, 74)
(5597.022091901593, 64)
(14163.524497513456, 74)
(8421.291473589275, 91)
(4913.544619799541, 0)
(14191.730380124072, 74)
(16425.048932007383, 46)
(14334.26477643364, 74)
(10795.084424226872, 0)
(2991.4043887204557, 64)
(6734.795908890922, 0)
(14546.198537299078, 74)
(11203.466723163894, 64)
(23733.18445978104, 77)
(6009.167528910632, 91)
(8948.443845062231, 74)
(9063.927636367784, 0)
(27302.78821198215, 18)
(18037.64081658726, 64)
(17598

(16103.101026631655, 77)
(9406.403494313092, 60)
(4008.968559618055, 60)
(15708.321139452431, 77)
(18974.011229259533, 77)
(27656.75573312135, 77)
(10263.81293826869, 77)
(21983.44470563135, 77)
(5022.955405988358, 68)
(11111.265185791688, 77)
(14110.374305203943, 77)
(25388.507981001967, 77)
(40627.02290639879, 77)
(4302.111213343943, 68)
(8467.281404842637, 68)
(2686.262655250219, 77)
(4932.130495824532, 77)
(7669.003991674262, 77)
(8534.978642206954, 68)
(25234.559189369902, 77)
(1269.1240502341973, 68)
(8500.541523750944, 77)
(58936.237142349695, 18)
(4432.083491817749, 60)
(4890.768523349182, 68)
(9538.40744255493, 60)
(3464.217539143179, 21)
(18394.089760091196, 77)
(25205.39566423004, 77)
(5155.096720430959, 68)
(8589.564374312007, 68)
(27497.308497121063, 77)
(8728.497856289094, 68)
(22321.878390631333, 77)
(22686.64444151756, 77)
(8227.894367550065, 77)
(25239.137596856457, 77)
(164189.74137480042, 18)
(17604.419588025892, 77)
(17042.203898425145, 77)
(44589.68906499313, 77)
(

(1050.4144240164499, 90)
(1149.8760710229456, 90)
(1149.8760710229456, 90)
(665.6337726002353, 90)
(1162.0943424857717, 90)
(1162.0943424857717, 90)
(665.6337726002353, 90)
(744.8051249811502, 37)
(744.8051249811502, 37)
(1020.5960047865697, 37)
(1020.5960047865697, 37)
(2798.383490380625, 80)
(2173.722913441513, 80)
(2123.236894453, 78)
(960.0216421946408, 57)
(1395.9472391480508, 82)
(1395.9472391480508, 82)
(1254.5723916879572, 82)
(1242.1006561520028, 82)
(1176.259229642454, 48)
(536.1763539759736, 37)
(1176.259229642454, 48)
(476.93519016910454, 48)
(476.93519016910454, 48)
(705.666361733375, 48)
(786.7971477426479, 48)
(786.7971477426479, 48)
(674.898122697184, 48)
(689.762191702174, 48)
(705.666361733375, 48)
(319.2306183830745, 3)
(319.2306183830745, 3)
(1598.0782255493518, 29)
(802.8517537956661, 3)
(1781.3889627399092, 71)
(1598.0782255493518, 29)
(307.04965748882506, 24)
(1502.1795797900056, 71)
(908.130739840545, 37)
(307.04965748882506, 24)
(1241.3887183502711, 90)
(1241.3

(3067.3124150308577, 61)
(354.1065956096347, 61)
(1838.792309835317, 74)
(2440.325347122176, 67)
(11859.613381420659, 11)
(2419.3085324860735, 82)
(2528.039880544121, 79)
(2045.379682151347, 0)
(28826.283798377895, 69)
(3030.6779863487054, 26)
(3492.3813891091813, 26)
(144.02988874060557, 26)
(14939.460121150609, 61)
(7732.16803867859, 61)
(161.97116740356125, 23)
(161.97116740356125, 23)
(2677.454913947276, 61)
(3130.746102132183, 84)
(1848.6716177382234, 34)
(1495.1023125816134, 57)
(3361.8828305192205, 84)
(10381.714640386574, 1)
(4448.430126143742, 3)
(4448.430126143742, 3)
(698.9676486866476, 90)
(698.9676486866476, 90)
(3021.084134820811, 14)
(698.9676486866476, 90)
(698.9676486866476, 90)
(1250.3591498392746, 92)
(2180.091036128508, 85)
(2093.469628161203, 74)
(1774.2564998617577, 74)
(698.9676486866476, 90)
(8990.382552101255, 1)
(15717.739432827362, 44)
(12753.999502576118, 84)
(992.2892557590176, 55)
(7200.683308081549, 80)
(8848.067406764141, 61)
(668.3137216197393, 37)
(465

In [13]:
def makeUpdatedCsv(data, column_names, infile, outfile):
    df = pd.read_csv(infile)
    i = 0
    while i < len(column_names):
        col_name = column_names[i]
        column_data = []
        for j in range(len(data)):
            column_data.append(data[j][i])
        df[col_name] = column_data
        i += 1
    df.to_csv(outfile)

makeUpdatedCsv(nearest_sites_data, ['distance', 'id', 'type'],
               'country_data/jor_schools.csv', 'example.csv')
